1 /**********************************************************
2  * Copyright 1998-2013 VMware, Inc.  All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **********************************************************/
25 
26 /**
27  * @file svga_tgsi_vgpu10.c
28  *
29  * TGSI -> VGPU10 shader translation.
30  *
31  * \author Mingcheng Chen
32  * \author Brian Paul
33  */
34 
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_strings.h"
44 #include "tgsi/tgsi_two_side.h"
45 #include "tgsi/tgsi_aa_point.h"
46 #include "tgsi/tgsi_util.h"
47 #include "util/u_math.h"
48 #include "util/u_memory.h"
49 #include "util/u_bitmask.h"
50 #include "util/u_debug.h"
51 #include "util/u_pstipple.h"
52 
53 #include "svga_context.h"
54 #include "svga_debug.h"
55 #include "svga_link.h"
56 #include "svga_shader.h"
57 #include "svga_tgsi.h"
58 
59 #include "VGPU10ShaderTokens.h"
60 
61 
62 #define INVALID_INDEX 99999
63 #define MAX_INTERNAL_TEMPS 3
64 #define MAX_SYSTEM_VALUES 4
65 #define MAX_IMMEDIATE_COUNT \
66         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
67 #define MAX_TEMP_ARRAYS 64  /* Enough? */
68 
69 
70 /**
71  * Clipping is complicated.  There's four different cases which we
72  * handle during VS/GS shader translation:
73  */
74 enum clipping_mode
75 {
76    CLIP_NONE,     /**< No clipping enabled */
77    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
78                    * one or more user-defined clip planes are enabled.  We
79                    * generate extra code to emit clip distances.
80                    */
81    CLIP_DISTANCE, /**< The shader already declares clip distance output
82                    * registers and has code to write to them.
83                    */
84    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
85                   * has code that writes to the register.  We convert the
86                   * clipvertex position into one or more clip distances.
87                   */
88 };
89 
90 
91 /* Shader signature info */
92 struct svga_shader_signature
93 {
94    SVGA3dDXShaderSignatureHeader header;
95    SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
96    SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
97    SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
98 };
99 
100 static inline void
set_shader_signature_entry(SVGA3dDXShaderSignatureEntry * e,unsigned index,SVGA3dDXSignatureSemanticName sgnName,unsigned mask,SVGA3dDXSignatureRegisterComponentType compType,SVGA3dDXSignatureMinPrecision minPrecision)101 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
102                            unsigned index,
103                            SVGA3dDXSignatureSemanticName sgnName,
104                            unsigned mask,
105                            SVGA3dDXSignatureRegisterComponentType compType,
106                            SVGA3dDXSignatureMinPrecision minPrecision)
107 {
108    e->registerIndex = index;
109    e->semanticName = sgnName;
110    e->mask = mask;
111    e->componentType = compType;
112    e->minPrecision = minPrecision;
113 };
114 
115 static const SVGA3dDXSignatureSemanticName
116 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
117    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
118    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
124    SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
125    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
126    SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
127    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
128    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
129    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130    SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
131    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
138    SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
139    SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
140    SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
141    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
142    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
143    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
144    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
145    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
160    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
161    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
162 };
163 
164 
165 /**
166  * Map tgsi semantic name to SVGA signature semantic name
167  */
168 static inline SVGA3dDXSignatureSemanticName
map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)169 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
170 {
171    assert(name < TGSI_SEMANTIC_COUNT);
172 
173    /* Do a few asserts here to spot check the mapping */
174    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
175           SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
176    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
177           SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
178    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
179           SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
180 
181    return tgsi_semantic_to_sgn_name[name];
182 }
183 
184 
185 struct svga_shader_emitter_v10
186 {
187    /* The token output buffer */
188    unsigned size;
189    char *buf;
190    char *ptr;
191 
192    /* Information about the shader and state (does not change) */
193    struct svga_compile_key key;
194    struct tgsi_shader_info info;
195    unsigned unit;
196    unsigned version; /**< Either 40 or 41 at this time */
197 
198    unsigned cur_tgsi_token;     /**< current tgsi token position */
199    unsigned inst_start_token;
200    boolean discard_instruction; /**< throw away current instruction? */
201    boolean reemit_instruction;  /**< reemit current instruction */
202    boolean skip_instruction;    /**< skip current instruction */
203 
204    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
205    double (*immediates_dbl)[2];
206    unsigned num_immediates;      /**< Number of immediates emitted */
207    unsigned common_immediate_pos[10];  /**< literals for common immediates */
208    unsigned num_common_immediates;
209    boolean immediates_emitted;
210 
211    unsigned num_outputs;      /**< include any extra outputs */
212                               /**  The first extra output is reserved for
213                                *   non-adjusted vertex position for
214                                *   stream output purpose
215                                */
216 
217    /* Temporary Registers */
218    unsigned num_shader_temps; /**< num of temps used by original shader */
219    unsigned internal_temp_count;  /**< currently allocated internal temps */
220    struct {
221       unsigned start, size;
222    } temp_arrays[MAX_TEMP_ARRAYS];
223    unsigned num_temp_arrays;
224 
225    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
226    struct {
227       unsigned arrayId, index;
228       boolean initialized;
229    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
230 
231    unsigned initialize_temp_index;
232 
233    /** Number of constants used by original shader for each constant buffer.
234     * The size should probably always match with that of svga_state.constbufs.
235     */
236    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
237 
238    /* Samplers */
239    unsigned num_samplers;
240    boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
241    ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
242    ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
243 
244    /* Index Range declaration */
245    struct {
246       unsigned start_index;
247       unsigned count;
248       boolean required;
249       unsigned operandType;
250       unsigned size;
251       unsigned dim;
252    } index_range;
253 
254    /* Address regs (really implemented with temps) */
255    unsigned num_address_regs;
256    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
257 
258    /* Output register usage masks */
259    ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
260 
261    /* To map TGSI system value index to VGPU shader input indexes */
262    ubyte system_value_indexes[MAX_SYSTEM_VALUES];
263 
264    struct {
265       /* vertex position scale/translation */
266       unsigned out_index;  /**< the real position output reg */
267       unsigned tmp_index;  /**< the fake/temp position output reg */
268       unsigned so_index;   /**< the non-adjusted position output reg */
269       unsigned prescale_cbuf_index;  /* index to the const buf for prescale */
270       unsigned prescale_scale_index, prescale_trans_index;
271       unsigned num_prescale;      /* number of prescale factor in const buf */
272       unsigned viewport_index;
273       unsigned need_prescale:1;
274       unsigned have_prescale:1;
275    } vposition;
276 
277    /* For vertex shaders only */
278    struct {
279       /* viewport constant */
280       unsigned viewport_index;
281 
282       unsigned vertex_id_bias_index;
283       unsigned vertex_id_sys_index;
284       unsigned vertex_id_tmp_index;
285 
286       /* temp index of adjusted vertex attributes */
287       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
288    } vs;
289 
290    /* For fragment shaders only */
291    struct {
292       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
293       unsigned num_color_outputs;
294       unsigned color_tmp_index;  /**< fake/temp color output reg */
295       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
296 
297       /* front-face */
298       unsigned face_input_index; /**< real fragment shader face reg (bool) */
299       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
300 
301       unsigned pstipple_sampler_unit;
302 
303       unsigned fragcoord_input_index;  /**< real fragment position input reg */
304       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
305 
306       unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
307 
308       unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
309       unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
310 
311       /** TGSI index of sample mask input sys value */
312       unsigned sample_mask_in_sys_index;
313 
314       /** Which texture units are doing shadow comparison in the FS code */
315       unsigned shadow_compare_units;
316 
317       /* layer */
318       unsigned layer_input_index;    /**< TGSI index of layer */
319       unsigned layer_imm_index;      /**< immediate for default layer 0 */
320    } fs;
321 
322    /* For geometry shaders only */
323    struct {
324       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
325       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
326       unsigned input_size;       /**< size of input arrays */
327       unsigned prim_id_index;    /**< primitive id register index */
328       unsigned max_out_vertices; /**< maximum number of output vertices */
329       unsigned invocations;
330       unsigned invocation_id_sys_index;
331 
332       unsigned viewport_index_out_index;
333       unsigned viewport_index_tmp_index;
334    } gs;
335 
336    /* For tessellation control shaders only */
337    struct {
338       unsigned vertices_per_patch_index;     /**< vertices_per_patch system value index */
339       unsigned imm_index;                    /**< immediate for tcs */
340       unsigned invocation_id_sys_index;      /**< invocation id */
341       unsigned invocation_id_tmp_index;
342       unsigned instruction_token_pos;        /* token pos for the first instruction */
343       unsigned control_point_input_index;    /* control point input register index */
344       unsigned control_point_addr_index;     /* control point input address register */
345       unsigned control_point_out_index;      /* control point output register index */
346       unsigned control_point_tmp_index;      /* control point temporary register */
347       unsigned control_point_out_count;      /* control point output count */
348       boolean  control_point_phase;          /* true if in control point phase */
349       boolean  fork_phase_add_signature;     /* true if needs to add signature in fork phase */
350       unsigned patch_generic_out_count;      /* per-patch generic output count */
351       unsigned patch_generic_out_index;      /* per-patch generic output register index*/
352       unsigned patch_generic_tmp_index;      /* per-patch generic temporary register index*/
353       unsigned prim_id_index;                /* primitive id */
354       struct {
355          unsigned out_index;      /* real tessinner output register */
356          unsigned temp_index;     /* tessinner temp register */
357          unsigned tgsi_index;     /* tgsi tessinner output register */
358       } inner;
359       struct {
360          unsigned out_index;      /* real tessouter output register */
361          unsigned temp_index;     /* tessouter temp register */
362          unsigned tgsi_index;     /* tgsi tessouter output register */
363       } outer;
364    } tcs;
365 
366    /* For tessellation evaluation shaders only */
367    struct {
368       enum pipe_prim_type prim_mode;
369       enum pipe_tess_spacing spacing;
370       boolean vertices_order_cw;
371       boolean point_mode;
372       unsigned tesscoord_sys_index;
373       unsigned prim_id_index;                /* primitive id */
374       struct {
375          unsigned in_index;       /* real tessinner input register */
376          unsigned temp_index;     /* tessinner temp register */
377          unsigned tgsi_index;     /* tgsi tessinner input register */
378       } inner;
379       struct {
380          unsigned in_index;       /* real tessouter input register */
381          unsigned temp_index;     /* tessouter temp register */
382          unsigned tgsi_index;     /* tgsi tessouter input register */
383       } outer;
384    } tes;
385 
386    /* For vertex or geometry shaders */
387    enum clipping_mode clip_mode;
388    unsigned clip_dist_out_index; /**< clip distance output register index */
389    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
390    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
391 
392    /** Index of temporary holding the clipvertex coordinate */
393    unsigned clip_vertex_out_index; /**< clip vertex output register index */
394    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
395 
396    /* user clip plane constant slot indexes */
397    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
398 
399    unsigned num_output_writes;
400    boolean constant_color_output;
401 
402    boolean uses_flat_interp;
403 
404    unsigned reserved_token;        /* index to the reserved token */
405    boolean uses_precise_qualifier;
406 
407    /* For all shaders: const reg index for RECT coord scaling */
408    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
409 
410    /* For all shaders: const reg index for texture buffer size */
411    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
412 
413    /* VS/TCS/TES/GS/FS Linkage info */
414    struct shader_linkage linkage;
415    struct tgsi_shader_info *prevShaderInfo;
416 
417    /* Shader signature */
418    struct svga_shader_signature signature;
419 
420    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
421 
422    /* For pipe_debug_message */
423    struct pipe_debug_callback svga_debug_callback;
424 
425    /* current loop depth in shader */
426    unsigned current_loop_depth;
427 };
428 
429 
430 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
431 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
432 static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
433 static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
434 static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
435 static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
436 static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
437 static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
438 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
439 
440 static boolean
441 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
442 
443 static boolean
444 emit_vertex(struct svga_shader_emitter_v10 *emit,
445             const struct tgsi_full_instruction *inst);
446 
447 static boolean
448 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
449                         unsigned inst_number,
450                         const struct tgsi_full_instruction *inst);
451 
452 static void
453 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
454                        unsigned opcodeType, unsigned operandType,
455                        unsigned dim, unsigned index, unsigned size,
456                        unsigned name, unsigned numComp,
457                        unsigned selMode, unsigned usageMask,
458                        unsigned interpMode,
459                        boolean addSignature,
460                        SVGA3dDXSignatureSemanticName sgnName);
461 
462 static void
463 create_temp_array(struct svga_shader_emitter_v10 *emit,
464                   unsigned arrayID, unsigned first, unsigned count,
465                   unsigned startIndex);
466 
467 static char err_buf[128];
468 
469 static boolean
expand(struct svga_shader_emitter_v10 * emit)470 expand(struct svga_shader_emitter_v10 *emit)
471 {
472    char *new_buf;
473    unsigned newsize = emit->size * 2;
474 
475    if (emit->buf != err_buf)
476       new_buf = REALLOC(emit->buf, emit->size, newsize);
477    else
478       new_buf = NULL;
479 
480    if (!new_buf) {
481       emit->ptr = err_buf;
482       emit->buf = err_buf;
483       emit->size = sizeof(err_buf);
484       return FALSE;
485    }
486 
487    emit->size = newsize;
488    emit->ptr = new_buf + (emit->ptr - emit->buf);
489    emit->buf = new_buf;
490    return TRUE;
491 }
492 
493 /**
494  * Create and initialize a new svga_shader_emitter_v10 object.
495  */
496 static struct svga_shader_emitter_v10 *
alloc_emitter(void)497 alloc_emitter(void)
498 {
499    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
500 
501    if (!emit)
502       return NULL;
503 
504    /* to initialize the output buffer */
505    emit->size = 512;
506    if (!expand(emit)) {
507       FREE(emit);
508       return NULL;
509    }
510    return emit;
511 }
512 
513 /**
514  * Free an svga_shader_emitter_v10 object.
515  */
516 static void
free_emitter(struct svga_shader_emitter_v10 * emit)517 free_emitter(struct svga_shader_emitter_v10 *emit)
518 {
519    assert(emit);
520    FREE(emit->buf);    /* will be NULL if translation succeeded */
521    FREE(emit);
522 }
523 
524 static inline boolean
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)525 reserve(struct svga_shader_emitter_v10 *emit,
526         unsigned nr_dwords)
527 {
528    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
529       if (!expand(emit))
530          return FALSE;
531    }
532 
533    return TRUE;
534 }
535 
536 static boolean
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)537 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
538 {
539    if (!reserve(emit, 1))
540       return FALSE;
541 
542    *(uint32 *)emit->ptr = dword;
543    emit->ptr += sizeof dword;
544    return TRUE;
545 }
546 
547 static boolean
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)548 emit_dwords(struct svga_shader_emitter_v10 *emit,
549             const uint32 *dwords,
550             unsigned nr)
551 {
552    if (!reserve(emit, nr))
553       return FALSE;
554 
555    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
556    emit->ptr += nr * sizeof *dwords;
557    return TRUE;
558 }
559 
560 /** Return the number of tokens in the emitter's buffer */
561 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)562 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
563 {
564    return (emit->ptr - emit->buf) / sizeof(unsigned);
565 }
566 
567 
568 /**
569  * Check for register overflow.  If we overflow we'll set an
570  * error flag.  This function can be called for register declarations
571  * or use as src/dst instruction operands.
572  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
573                 or VGPU10_OPCODE_DCL_x
574  * \param index  the register index
575  */
576 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)577 check_register_index(struct svga_shader_emitter_v10 *emit,
578                      unsigned operandType, unsigned index)
579 {
580    bool overflow_before = emit->register_overflow;
581 
582    switch (operandType) {
583    case VGPU10_OPERAND_TYPE_TEMP:
584    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
585    case VGPU10_OPCODE_DCL_TEMPS:
586       if (index >= VGPU10_MAX_TEMPS) {
587          emit->register_overflow = TRUE;
588       }
589       break;
590    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
591    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
592       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
593          emit->register_overflow = TRUE;
594       }
595       break;
596    case VGPU10_OPERAND_TYPE_INPUT:
597    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
598    case VGPU10_OPCODE_DCL_INPUT:
599    case VGPU10_OPCODE_DCL_INPUT_SGV:
600    case VGPU10_OPCODE_DCL_INPUT_SIV:
601    case VGPU10_OPCODE_DCL_INPUT_PS:
602    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
603    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
604       if ((emit->unit == PIPE_SHADER_VERTEX &&
605            index >= VGPU10_MAX_VS_INPUTS) ||
606           (emit->unit == PIPE_SHADER_GEOMETRY &&
607            index >= VGPU10_MAX_GS_INPUTS) ||
608           (emit->unit == PIPE_SHADER_FRAGMENT &&
609            index >= VGPU10_MAX_FS_INPUTS) ||
610           (emit->unit == PIPE_SHADER_TESS_CTRL &&
611            index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
612           (emit->unit == PIPE_SHADER_TESS_EVAL &&
613            index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
614          emit->register_overflow = TRUE;
615       }
616       break;
617    case VGPU10_OPERAND_TYPE_OUTPUT:
618    case VGPU10_OPCODE_DCL_OUTPUT:
619    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
620    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
621       /* Note: we are skipping two output indices in tcs for
622        * tessinner/outer levels. Implementation will not exceed
623        * number of output count but it allows index to go beyond
624        * VGPU11_MAX_HS_OUTPUTS.
625        * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
626        */
627       if ((emit->unit == PIPE_SHADER_VERTEX &&
628            index >= VGPU10_MAX_VS_OUTPUTS) ||
629           (emit->unit == PIPE_SHADER_GEOMETRY &&
630            index >= VGPU10_MAX_GS_OUTPUTS) ||
631           (emit->unit == PIPE_SHADER_FRAGMENT &&
632            index >= VGPU10_MAX_FS_OUTPUTS) ||
633           (emit->unit == PIPE_SHADER_TESS_CTRL &&
634            index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
635           (emit->unit == PIPE_SHADER_TESS_EVAL &&
636            index >= VGPU11_MAX_DS_OUTPUTS)) {
637          emit->register_overflow = TRUE;
638       }
639       break;
640    case VGPU10_OPERAND_TYPE_SAMPLER:
641    case VGPU10_OPCODE_DCL_SAMPLER:
642       if (index >= VGPU10_MAX_SAMPLERS) {
643          emit->register_overflow = TRUE;
644       }
645       break;
646    case VGPU10_OPERAND_TYPE_RESOURCE:
647    case VGPU10_OPCODE_DCL_RESOURCE:
648       if (index >= VGPU10_MAX_RESOURCES) {
649          emit->register_overflow = TRUE;
650       }
651       break;
652    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
653       if (index >= MAX_IMMEDIATE_COUNT) {
654          emit->register_overflow = TRUE;
655       }
656       break;
657    case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
658       /* nothing */
659       break;
660    default:
661       assert(0);
662       ; /* nothing */
663    }
664 
665    if (emit->register_overflow && !overflow_before) {
666       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
667                    operandType, index);
668    }
669 }
670 
671 
672 /**
673  * Examine misc state to determine the clipping mode.
674  */
675 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)676 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
677 {
678    /* num_written_clipdistance in the shader info for tessellation
679     * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
680     * is not defined for this shader. So we go through all the output declarations
681     * to set the num_written_clipdistance. This is just to determine the
682     * clipping mode.
683     */
684    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
685       unsigned i;
686       for (i = 0; i < emit->info.num_outputs; i++) {
687          if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
688             emit->info.num_written_clipdistance =
689                4 * (emit->info.output_semantic_index[i] + 1);
690          }
691       }
692    }
693 
694    if (emit->info.num_written_clipdistance > 0) {
695       emit->clip_mode = CLIP_DISTANCE;
696    }
697    else if (emit->info.writes_clipvertex) {
698       emit->clip_mode = CLIP_VERTEX;
699    }
700    else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
701       /*
702        * Only the last shader in the vertex processing stage needs to
703        * handle the legacy clip mode.
704        */
705       emit->clip_mode = CLIP_LEGACY;
706    }
707    else {
708       emit->clip_mode = CLIP_NONE;
709    }
710 }
711 
712 
713 /**
714  * For clip distance register declarations and clip distance register
715  * writes we need to mask the declaration usage or instruction writemask
716  * (respectively) against the set of the really-enabled clipping planes.
717  *
718  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
719  * has a VS that writes to all 8 clip distance registers, but the plane enable
720  * flags are a subset of that.
721  *
722  * This function is used to apply the plane enable flags to the register
723  * declaration or instruction writemask.
724  *
725  * \param writemask  the declaration usage mask or instruction writemask
726  * \param clip_reg_index  which clip plane register is being declared/written.
727  *                        The legal values are 0 and 1 (two clip planes per
728  *                        register, for a total of 8 clip planes)
729  */
730 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)731 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
732                       unsigned writemask, unsigned clip_reg_index)
733 {
734    unsigned shift;
735 
736    assert(clip_reg_index < 2);
737 
738    /* four clip planes per clip register: */
739    shift = clip_reg_index * 4;
740    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
741 
742    return writemask;
743 }
744 
745 
746 /**
747  * Translate gallium shader type into VGPU10 type.
748  */
749 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)750 translate_shader_type(unsigned type)
751 {
752    switch (type) {
753    case PIPE_SHADER_VERTEX:
754       return VGPU10_VERTEX_SHADER;
755    case PIPE_SHADER_GEOMETRY:
756       return VGPU10_GEOMETRY_SHADER;
757    case PIPE_SHADER_FRAGMENT:
758       return VGPU10_PIXEL_SHADER;
759    case PIPE_SHADER_TESS_CTRL:
760       return VGPU10_HULL_SHADER;
761    case PIPE_SHADER_TESS_EVAL:
762       return VGPU10_DOMAIN_SHADER;
763    case PIPE_SHADER_COMPUTE:
764       return VGPU10_COMPUTE_SHADER;
765    default:
766       assert(!"Unexpected shader type");
767       return VGPU10_VERTEX_SHADER;
768    }
769 }
770 
771 
772 /**
773  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
774  * Note: we only need to translate the opcodes for "simple" instructions,
775  * as seen below.  All other opcodes are handled/translated specially.
776  */
777 static VGPU10_OPCODE_TYPE
translate_opcode(enum tgsi_opcode opcode)778 translate_opcode(enum tgsi_opcode opcode)
779 {
780    switch (opcode) {
781    case TGSI_OPCODE_MOV:
782       return VGPU10_OPCODE_MOV;
783    case TGSI_OPCODE_MUL:
784       return VGPU10_OPCODE_MUL;
785    case TGSI_OPCODE_ADD:
786       return VGPU10_OPCODE_ADD;
787    case TGSI_OPCODE_DP3:
788       return VGPU10_OPCODE_DP3;
789    case TGSI_OPCODE_DP4:
790       return VGPU10_OPCODE_DP4;
791    case TGSI_OPCODE_MIN:
792       return VGPU10_OPCODE_MIN;
793    case TGSI_OPCODE_MAX:
794       return VGPU10_OPCODE_MAX;
795    case TGSI_OPCODE_MAD:
796       return VGPU10_OPCODE_MAD;
797    case TGSI_OPCODE_SQRT:
798       return VGPU10_OPCODE_SQRT;
799    case TGSI_OPCODE_FRC:
800       return VGPU10_OPCODE_FRC;
801    case TGSI_OPCODE_FLR:
802       return VGPU10_OPCODE_ROUND_NI;
803    case TGSI_OPCODE_FSEQ:
804       return VGPU10_OPCODE_EQ;
805    case TGSI_OPCODE_FSGE:
806       return VGPU10_OPCODE_GE;
807    case TGSI_OPCODE_FSNE:
808       return VGPU10_OPCODE_NE;
809    case TGSI_OPCODE_DDX:
810       return VGPU10_OPCODE_DERIV_RTX;
811    case TGSI_OPCODE_DDY:
812       return VGPU10_OPCODE_DERIV_RTY;
813    case TGSI_OPCODE_RET:
814       return VGPU10_OPCODE_RET;
815    case TGSI_OPCODE_DIV:
816       return VGPU10_OPCODE_DIV;
817    case TGSI_OPCODE_IDIV:
818       return VGPU10_OPCODE_VMWARE;
819    case TGSI_OPCODE_DP2:
820       return VGPU10_OPCODE_DP2;
821    case TGSI_OPCODE_BRK:
822       return VGPU10_OPCODE_BREAK;
823    case TGSI_OPCODE_IF:
824       return VGPU10_OPCODE_IF;
825    case TGSI_OPCODE_ELSE:
826       return VGPU10_OPCODE_ELSE;
827    case TGSI_OPCODE_ENDIF:
828       return VGPU10_OPCODE_ENDIF;
829    case TGSI_OPCODE_CEIL:
830       return VGPU10_OPCODE_ROUND_PI;
831    case TGSI_OPCODE_I2F:
832       return VGPU10_OPCODE_ITOF;
833    case TGSI_OPCODE_NOT:
834       return VGPU10_OPCODE_NOT;
835    case TGSI_OPCODE_TRUNC:
836       return VGPU10_OPCODE_ROUND_Z;
837    case TGSI_OPCODE_SHL:
838       return VGPU10_OPCODE_ISHL;
839    case TGSI_OPCODE_AND:
840       return VGPU10_OPCODE_AND;
841    case TGSI_OPCODE_OR:
842       return VGPU10_OPCODE_OR;
843    case TGSI_OPCODE_XOR:
844       return VGPU10_OPCODE_XOR;
845    case TGSI_OPCODE_CONT:
846       return VGPU10_OPCODE_CONTINUE;
847    case TGSI_OPCODE_EMIT:
848       return VGPU10_OPCODE_EMIT;
849    case TGSI_OPCODE_ENDPRIM:
850       return VGPU10_OPCODE_CUT;
851    case TGSI_OPCODE_BGNLOOP:
852       return VGPU10_OPCODE_LOOP;
853    case TGSI_OPCODE_ENDLOOP:
854       return VGPU10_OPCODE_ENDLOOP;
855    case TGSI_OPCODE_ENDSUB:
856       return VGPU10_OPCODE_RET;
857    case TGSI_OPCODE_NOP:
858       return VGPU10_OPCODE_NOP;
859    case TGSI_OPCODE_END:
860       return VGPU10_OPCODE_RET;
861    case TGSI_OPCODE_F2I:
862       return VGPU10_OPCODE_FTOI;
863    case TGSI_OPCODE_IMAX:
864       return VGPU10_OPCODE_IMAX;
865    case TGSI_OPCODE_IMIN:
866       return VGPU10_OPCODE_IMIN;
867    case TGSI_OPCODE_UDIV:
868    case TGSI_OPCODE_UMOD:
869    case TGSI_OPCODE_MOD:
870       return VGPU10_OPCODE_UDIV;
871    case TGSI_OPCODE_IMUL_HI:
872       return VGPU10_OPCODE_IMUL;
873    case TGSI_OPCODE_INEG:
874       return VGPU10_OPCODE_INEG;
875    case TGSI_OPCODE_ISHR:
876       return VGPU10_OPCODE_ISHR;
877    case TGSI_OPCODE_ISGE:
878       return VGPU10_OPCODE_IGE;
879    case TGSI_OPCODE_ISLT:
880       return VGPU10_OPCODE_ILT;
881    case TGSI_OPCODE_F2U:
882       return VGPU10_OPCODE_FTOU;
883    case TGSI_OPCODE_UADD:
884       return VGPU10_OPCODE_IADD;
885    case TGSI_OPCODE_U2F:
886       return VGPU10_OPCODE_UTOF;
887    case TGSI_OPCODE_UCMP:
888       return VGPU10_OPCODE_MOVC;
889    case TGSI_OPCODE_UMAD:
890       return VGPU10_OPCODE_UMAD;
891    case TGSI_OPCODE_UMAX:
892       return VGPU10_OPCODE_UMAX;
893    case TGSI_OPCODE_UMIN:
894       return VGPU10_OPCODE_UMIN;
895    case TGSI_OPCODE_UMUL:
896    case TGSI_OPCODE_UMUL_HI:
897       return VGPU10_OPCODE_UMUL;
898    case TGSI_OPCODE_USEQ:
899       return VGPU10_OPCODE_IEQ;
900    case TGSI_OPCODE_USGE:
901       return VGPU10_OPCODE_UGE;
902    case TGSI_OPCODE_USHR:
903       return VGPU10_OPCODE_USHR;
904    case TGSI_OPCODE_USLT:
905       return VGPU10_OPCODE_ULT;
906    case TGSI_OPCODE_USNE:
907       return VGPU10_OPCODE_INE;
908    case TGSI_OPCODE_SWITCH:
909       return VGPU10_OPCODE_SWITCH;
910    case TGSI_OPCODE_CASE:
911       return VGPU10_OPCODE_CASE;
912    case TGSI_OPCODE_DEFAULT:
913       return VGPU10_OPCODE_DEFAULT;
914    case TGSI_OPCODE_ENDSWITCH:
915       return VGPU10_OPCODE_ENDSWITCH;
916    case TGSI_OPCODE_FSLT:
917       return VGPU10_OPCODE_LT;
918    case TGSI_OPCODE_ROUND:
919       return VGPU10_OPCODE_ROUND_NE;
920    /* Begin SM5 opcodes */
921    case TGSI_OPCODE_F2D:
922       return VGPU10_OPCODE_FTOD;
923    case TGSI_OPCODE_D2F:
924       return VGPU10_OPCODE_DTOF;
925    case TGSI_OPCODE_DMUL:
926       return VGPU10_OPCODE_DMUL;
927    case TGSI_OPCODE_DADD:
928       return VGPU10_OPCODE_DADD;
929    case TGSI_OPCODE_DMAX:
930       return VGPU10_OPCODE_DMAX;
931    case TGSI_OPCODE_DMIN:
932       return VGPU10_OPCODE_DMIN;
933    case TGSI_OPCODE_DSEQ:
934       return VGPU10_OPCODE_DEQ;
935    case TGSI_OPCODE_DSGE:
936       return VGPU10_OPCODE_DGE;
937    case TGSI_OPCODE_DSLT:
938       return VGPU10_OPCODE_DLT;
939    case TGSI_OPCODE_DSNE:
940       return VGPU10_OPCODE_DNE;
941    case TGSI_OPCODE_IBFE:
942       return VGPU10_OPCODE_IBFE;
943    case TGSI_OPCODE_UBFE:
944       return VGPU10_OPCODE_UBFE;
945    case TGSI_OPCODE_BFI:
946       return VGPU10_OPCODE_BFI;
947    case TGSI_OPCODE_BREV:
948       return VGPU10_OPCODE_BFREV;
949    case TGSI_OPCODE_POPC:
950       return VGPU10_OPCODE_COUNTBITS;
951    case TGSI_OPCODE_LSB:
952       return VGPU10_OPCODE_FIRSTBIT_LO;
953    case TGSI_OPCODE_IMSB:
954       return VGPU10_OPCODE_FIRSTBIT_SHI;
955    case TGSI_OPCODE_UMSB:
956       return VGPU10_OPCODE_FIRSTBIT_HI;
957    case TGSI_OPCODE_INTERP_CENTROID:
958       return VGPU10_OPCODE_EVAL_CENTROID;
959    case TGSI_OPCODE_INTERP_SAMPLE:
960       return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
961    case TGSI_OPCODE_BARRIER:
962       return VGPU10_OPCODE_SYNC;
963 
964    /* DX11.1 Opcodes */
965    case TGSI_OPCODE_DDIV:
966       return VGPU10_OPCODE_DDIV;
967    case TGSI_OPCODE_DRCP:
968       return VGPU10_OPCODE_DRCP;
969    case TGSI_OPCODE_D2I:
970       return VGPU10_OPCODE_DTOI;
971    case TGSI_OPCODE_D2U:
972       return VGPU10_OPCODE_DTOU;
973    case TGSI_OPCODE_I2D:
974       return VGPU10_OPCODE_ITOD;
975    case TGSI_OPCODE_U2D:
976       return VGPU10_OPCODE_UTOD;
977 
978    case TGSI_OPCODE_SAMPLE_POS:
979       /* Note: we never actually get this opcode because there's no GLSL
980        * function to query multisample resource sample positions.  There's
981        * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
982        * position of the current sample in the render target.
983        */
984       FALLTHROUGH;
985    case TGSI_OPCODE_SAMPLE_INFO:
986       /* NOTE: we never actually get this opcode because the GLSL compiler
987        * implements the gl_NumSamples variable with a simple constant in the
988        * constant buffer.
989        */
990       FALLTHROUGH;
991    default:
992       assert(!"Unexpected TGSI opcode in translate_opcode()");
993       return VGPU10_OPCODE_NOP;
994    }
995 }
996 
997 
998 /**
999  * Translate a TGSI register file type into a VGPU10 operand type.
1000  * \param array  is the TGSI_FILE_TEMPORARY register an array?
1001  */
1002 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,boolean array)1003 translate_register_file(enum tgsi_file_type file, boolean array)
1004 {
1005    switch (file) {
1006    case TGSI_FILE_CONSTANT:
1007       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1008    case TGSI_FILE_INPUT:
1009       return VGPU10_OPERAND_TYPE_INPUT;
1010    case TGSI_FILE_OUTPUT:
1011       return VGPU10_OPERAND_TYPE_OUTPUT;
1012    case TGSI_FILE_TEMPORARY:
1013       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1014                    : VGPU10_OPERAND_TYPE_TEMP;
1015    case TGSI_FILE_IMMEDIATE:
1016       /* all immediates are 32-bit values at this time so
1017        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1018        */
1019       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1020    case TGSI_FILE_SAMPLER:
1021       return VGPU10_OPERAND_TYPE_SAMPLER;
1022    case TGSI_FILE_SYSTEM_VALUE:
1023       return VGPU10_OPERAND_TYPE_INPUT;
1024 
1025    /* XXX TODO more cases to finish */
1026 
1027    default:
1028       assert(!"Bad tgsi register file!");
1029       return VGPU10_OPERAND_TYPE_NULL;
1030    }
1031 }
1032 
1033 
1034 /**
1035  * Emit a null dst register
1036  */
1037 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)1038 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1039 {
1040    VGPU10OperandToken0 operand;
1041 
1042    operand.value = 0;
1043    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1044    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1045 
1046    emit_dword(emit, operand.value);
1047 }
1048 
1049 
1050 /**
1051  * If the given register is a temporary, return the array ID.
1052  * Else return zero.
1053  */
1054 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1055 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1056                   enum tgsi_file_type file, unsigned index)
1057 {
1058    if (file == TGSI_FILE_TEMPORARY) {
1059       return emit->temp_map[index].arrayId;
1060    }
1061    else {
1062       return 0;
1063    }
1064 }
1065 
1066 
1067 /**
1068  * If the given register is a temporary, convert the index from a TGSI
1069  * TEMPORARY index to a VGPU10 temp index.
1070  */
1071 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1072 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1073                  enum tgsi_file_type file, unsigned index)
1074 {
1075    if (file == TGSI_FILE_TEMPORARY) {
1076       return emit->temp_map[index].index;
1077    }
1078    else {
1079       return index;
1080    }
1081 }
1082 
1083 
1084 /**
1085  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1086  * Note: the operandType field must already be initialized.
1087  * \param file  the register file being accessed
1088  * \param indirect  using indirect addressing of the register file?
1089  * \param index2D  if true, 2-D indexing is being used (const or temp registers)
1090  * \param indirect2D  if true, 2-D indirect indexing being used (for const buf)
1091  */
1092 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,boolean indirect,boolean index2D,bool indirect2D)1093 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1094                         VGPU10OperandToken0 operand0,
1095                         enum tgsi_file_type file,
1096                         boolean indirect,
1097                         boolean index2D, bool indirect2D)
1098 {
1099    VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1100    VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1101 
1102    /*
1103     * Compute index dimensions
1104     */
1105    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1106        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1107        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1108        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1109        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1110        operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1111       /* there's no swizzle for in-line immediates */
1112       indexDim = VGPU10_OPERAND_INDEX_0D;
1113       assert(operand0.selectionMode == 0);
1114    }
1115    else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1116       indexDim = VGPU10_OPERAND_INDEX_0D;
1117    }
1118    else {
1119       indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1120    }
1121 
1122    /*
1123     * Compute index representation(s) (immediate vs relative).
1124     */
1125    if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1126       index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1127          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1128 
1129       index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1130          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1131    }
1132    else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1133       index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1134          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1135 
1136       index1Rep = 0;
1137    }
1138    else {
1139       index0Rep = 0;
1140       index1Rep = 0;
1141    }
1142 
1143    operand0.indexDimension = indexDim;
1144    operand0.index0Representation = index0Rep;
1145    operand0.index1Representation = index1Rep;
1146 
1147    return operand0;
1148 }
1149 
1150 
1151 /**
1152  * Emit the operand for expressing an address register for indirect indexing.
1153  * Note that the address register is really just a temp register.
1154  * \param addr_reg_index  which address register to use
1155  */
1156 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)1157 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1158                        unsigned addr_reg_index)
1159 {
1160    unsigned tmp_reg_index;
1161    VGPU10OperandToken0 operand0;
1162 
1163    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1164 
1165    tmp_reg_index = emit->address_reg_index[addr_reg_index];
1166 
1167    /* operand0 is a simple temporary register, selecting one component */
1168    operand0.value = 0;
1169    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1170    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1171    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1172    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1173    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1174    operand0.swizzleX = 0;
1175    operand0.swizzleY = 1;
1176    operand0.swizzleZ = 2;
1177    operand0.swizzleW = 3;
1178 
1179    emit_dword(emit, operand0.value);
1180    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1181 }
1182 
1183 
1184 /**
1185  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1186  * \param emit  the emitter context
1187  * \param reg  the TGSI dst register to translate
1188  */
1189 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)1190 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1191                   const struct tgsi_full_dst_register *reg)
1192 {
1193    enum tgsi_file_type file = reg->Register.File;
1194    unsigned index = reg->Register.Index;
1195    const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1196    const unsigned sem_index = emit->info.output_semantic_index[index];
1197    unsigned writemask = reg->Register.WriteMask;
1198    const boolean indirect = reg->Register.Indirect;
1199    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1200    boolean index2d = reg->Register.Dimension || tempArrayId > 0;
1201    VGPU10OperandToken0 operand0;
1202 
1203    if (file == TGSI_FILE_TEMPORARY) {
1204       emit->temp_map[index].initialized = TRUE;
1205    }
1206 
1207    if (file == TGSI_FILE_OUTPUT) {
1208       if (emit->unit == PIPE_SHADER_VERTEX ||
1209           emit->unit == PIPE_SHADER_GEOMETRY ||
1210           emit->unit == PIPE_SHADER_TESS_EVAL) {
1211          if (index == emit->vposition.out_index &&
1212              emit->vposition.tmp_index != INVALID_INDEX) {
1213             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
1214              * vertex position result in a temporary so that we can modify
1215              * it in the post_helper() code.
1216              */
1217             file = TGSI_FILE_TEMPORARY;
1218             index = emit->vposition.tmp_index;
1219          }
1220          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1221                   emit->clip_dist_tmp_index != INVALID_INDEX) {
1222             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1223              * We store the clip distance in a temporary first, then
1224              * we'll copy it to the shadow copy and to CLIPDIST with the
1225              * enabled planes mask in emit_clip_distance_instructions().
1226              */
1227             file = TGSI_FILE_TEMPORARY;
1228             index = emit->clip_dist_tmp_index + sem_index;
1229          }
1230          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1231                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
1232             /* replace the CLIPVERTEX output register with a temporary */
1233             assert(emit->clip_mode == CLIP_VERTEX);
1234             assert(sem_index == 0);
1235             file = TGSI_FILE_TEMPORARY;
1236             index = emit->clip_vertex_tmp_index;
1237          }
1238          else if (sem_name == TGSI_SEMANTIC_COLOR &&
1239                   emit->key.clamp_vertex_color) {
1240 
1241             /* set the saturate modifier of the instruction
1242              * to clamp the vertex color.
1243              */
1244             VGPU10OpcodeToken0 *token =
1245                (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1246             token->saturate = TRUE;
1247          }
1248          else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1249                   emit->gs.viewport_index_out_index != INVALID_INDEX) {
1250             file = TGSI_FILE_TEMPORARY;
1251             index = emit->gs.viewport_index_tmp_index;
1252          }
1253       }
1254       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1255          if (sem_name == TGSI_SEMANTIC_POSITION) {
1256             /* Fragment depth output register */
1257             operand0.value = 0;
1258             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1259             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1260             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1261             emit_dword(emit, operand0.value);
1262             return;
1263          }
1264          else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1265             /* Fragment sample mask output */
1266             operand0.value = 0;
1267             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1268             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1269             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1270             emit_dword(emit, operand0.value);
1271             return;
1272          }
1273          else if (index == emit->fs.color_out_index[0] &&
1274              emit->fs.color_tmp_index != INVALID_INDEX) {
1275             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
1276              * fragment color result in a temporary so that we can read it
1277              * it in the post_helper() code.
1278              */
1279             file = TGSI_FILE_TEMPORARY;
1280             index = emit->fs.color_tmp_index;
1281          }
1282          else {
1283             /* Typically, for fragment shaders, the output register index
1284              * matches the color semantic index.  But not when we write to
1285              * the fragment depth register.  In that case, OUT[0] will be
1286              * fragdepth and OUT[1] will be the 0th color output.  We need
1287              * to use the semantic index for color outputs.
1288              */
1289             assert(sem_name == TGSI_SEMANTIC_COLOR);
1290             index = emit->info.output_semantic_index[index];
1291 
1292             emit->num_output_writes++;
1293          }
1294       }
1295       else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1296          if (index == emit->tcs.inner.tgsi_index) {
1297             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1298              * in temporary for now so that will be store into appropriate
1299              * registers in post_helper() in patch constant phase.
1300              */
1301             if (emit->tcs.control_point_phase) {
1302                /* Discard writing into tessfactor in control point phase */
1303                emit->discard_instruction =  TRUE;
1304             }
1305             else {
1306                file = TGSI_FILE_TEMPORARY;
1307                index = emit->tcs.inner.temp_index;
1308             }
1309          }
1310          else if (index == emit->tcs.outer.tgsi_index) {
1311             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1312              * in temporary for now so that will be store into appropriate
1313              * registers in post_helper().
1314              */
1315             if (emit->tcs.control_point_phase) {
1316                /* Discard writing into tessfactor in control point phase */
1317                emit->discard_instruction =  TRUE;
1318             }
1319             else {
1320                file = TGSI_FILE_TEMPORARY;
1321                index = emit->tcs.outer.temp_index;
1322             }
1323          }
1324          else if (index >= emit->tcs.patch_generic_out_index &&
1325                   index < (emit->tcs.patch_generic_out_index +
1326                           emit->tcs.patch_generic_out_count)) {
1327             if (emit->tcs.control_point_phase) {
1328                /* Discard writing into generic patch constant outputs in
1329                   control point phase */
1330                emit->discard_instruction =  TRUE;
1331             }
1332             else {
1333                if (emit->reemit_instruction) {
1334                   /* Store results of reemitted instruction in temporary register. */
1335                   file = TGSI_FILE_TEMPORARY;
1336                   index = emit->tcs.patch_generic_tmp_index +
1337                           (index - emit->tcs.patch_generic_out_index);
1338                   /**
1339                    * Temporaries for patch constant data can be done
1340                    * as indexable temporaries.
1341                    */
1342                   tempArrayId = get_temp_array_id(emit, file, index);
1343                   index2d = tempArrayId > 0;
1344 
1345                   emit->reemit_instruction = FALSE;
1346                }
1347                else {
1348                   /* If per-patch outputs is been read in shader, we
1349                    * reemit instruction and store results in temporaries in
1350                    * patch constant phase. */
1351                   if (emit->info.reads_perpatch_outputs) {
1352                      emit->reemit_instruction = TRUE;
1353                   }
1354                }
1355             }
1356          }
1357          else if (reg->Register.Dimension) {
1358             /* Only control point outputs are declared 2D in tgsi */
1359             if (emit->tcs.control_point_phase) {
1360                if (emit->reemit_instruction) {
1361                   /* Store results of reemitted instruction in temporary register. */
1362                   index2d = FALSE;
1363                   file = TGSI_FILE_TEMPORARY;
1364                   index = emit->tcs.control_point_tmp_index +
1365                           (index - emit->tcs.control_point_out_index);
1366                   emit->reemit_instruction = FALSE;
1367                }
1368                else {
1369                   /* The mapped control point outputs are 1-D */
1370                   index2d = FALSE;
1371                   if (emit->info.reads_pervertex_outputs) {
1372                      /* If per-vertex outputs is been read in shader, we
1373                       * reemit instruction and store results in temporaries
1374                       * control point phase. */
1375                      emit->reemit_instruction = TRUE;
1376                   }
1377                }
1378 
1379                if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1380                    emit->clip_dist_tmp_index != INVALID_INDEX) {
1381                   /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1382                    * We store the clip distance in a temporary first, then
1383                    * we'll copy it to the shadow copy and to CLIPDIST with the
1384                    * enabled planes mask in emit_clip_distance_instructions().
1385                    */
1386                   file = TGSI_FILE_TEMPORARY;
1387                   index = emit->clip_dist_tmp_index + sem_index;
1388                }
1389                else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1390                         emit->clip_vertex_tmp_index != INVALID_INDEX) {
1391                   /* replace the CLIPVERTEX output register with a temporary */
1392                   assert(emit->clip_mode == CLIP_VERTEX);
1393                   assert(sem_index == 0);
1394                   file = TGSI_FILE_TEMPORARY;
1395                   index = emit->clip_vertex_tmp_index;
1396                }
1397             }
1398             else {
1399                /* Discard writing into control point outputs in
1400                   patch constant phase */
1401                emit->discard_instruction =  TRUE;
1402             }
1403          }
1404       }
1405    }
1406 
1407    /* init operand tokens to all zero */
1408    operand0.value = 0;
1409 
1410    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1411 
1412    /* the operand has a writemask */
1413    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1414 
1415    /* Which of the four dest components to write to. Note that we can use a
1416     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1417     */
1418    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1419    operand0.mask = writemask;
1420 
1421    /* translate TGSI register file type to VGPU10 operand type */
1422    operand0.operandType = translate_register_file(file, tempArrayId > 0);
1423 
1424    check_register_index(emit, operand0.operandType, index);
1425 
1426    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1427                                       index2d, FALSE);
1428 
1429    /* Emit tokens */
1430    emit_dword(emit, operand0.value);
1431    if (tempArrayId > 0) {
1432       emit_dword(emit, tempArrayId);
1433    }
1434 
1435    emit_dword(emit, remap_temp_index(emit, file, index));
1436 
1437    if (indirect) {
1438       emit_indirect_register(emit, reg->Indirect.Index);
1439    }
1440 }
1441 
1442 
1443 /**
1444  * Check if temporary register needs to be initialize when
1445  * shader is not using indirect addressing for temporary and uninitialized
1446  * temporary is not used in loop. In these two scenarios, we cannot
1447  * determine if temporary is initialized or not.
1448  */
1449 static boolean
need_temp_reg_initialization(struct svga_shader_emitter_v10 * emit,unsigned index)1450 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1451                              unsigned index)
1452 {
1453    if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1454        && emit->current_loop_depth == 0) {
1455       if (!emit->temp_map[index].initialized &&
1456           emit->temp_map[index].index < emit->num_shader_temps) {
1457          return TRUE;
1458       }
1459    }
1460 
1461    return FALSE;
1462 }
1463 
1464 
1465 /**
1466  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1467  * In quite a few cases, we do register substitution.  For example, if
1468  * the TGSI register is the front/back-face register, we replace that with
1469  * a temp register containing a value we computed earlier.
1470  */
1471 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)1472 emit_src_register(struct svga_shader_emitter_v10 *emit,
1473                   const struct tgsi_full_src_register *reg)
1474 {
1475    enum tgsi_file_type file = reg->Register.File;
1476    unsigned index = reg->Register.Index;
1477    const boolean indirect = reg->Register.Indirect;
1478    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1479    boolean index2d = (reg->Register.Dimension ||
1480                             tempArrayId > 0 ||
1481                             file == TGSI_FILE_CONSTANT);
1482    unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1483    boolean indirect2d = reg->Dimension.Indirect;
1484    unsigned swizzleX = reg->Register.SwizzleX;
1485    unsigned swizzleY = reg->Register.SwizzleY;
1486    unsigned swizzleZ = reg->Register.SwizzleZ;
1487    unsigned swizzleW = reg->Register.SwizzleW;
1488    const boolean absolute = reg->Register.Absolute;
1489    const boolean negate = reg->Register.Negate;
1490    VGPU10OperandToken0 operand0;
1491    VGPU10OperandToken1 operand1;
1492 
1493    operand0.value = operand1.value = 0;
1494 
1495    if (emit->unit == PIPE_SHADER_FRAGMENT){
1496       if (file == TGSI_FILE_INPUT) {
1497          if (index == emit->fs.face_input_index) {
1498             /* Replace INPUT[FACE] with TEMP[FACE] */
1499             file = TGSI_FILE_TEMPORARY;
1500             index = emit->fs.face_tmp_index;
1501          }
1502          else if (index == emit->fs.fragcoord_input_index) {
1503             /* Replace INPUT[POSITION] with TEMP[POSITION] */
1504             file = TGSI_FILE_TEMPORARY;
1505             index = emit->fs.fragcoord_tmp_index;
1506          }
1507          else if (index == emit->fs.layer_input_index) {
1508             /* Replace INPUT[LAYER] with zero.x */
1509             file = TGSI_FILE_IMMEDIATE;
1510             index = emit->fs.layer_imm_index;
1511             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1512          }
1513          else {
1514             /* We remap fragment shader inputs to that FS input indexes
1515              * match up with VS/GS output indexes.
1516              */
1517             index = emit->linkage.input_map[index];
1518          }
1519       }
1520       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1521          if (index == emit->fs.sample_pos_sys_index) {
1522             assert(emit->version >= 41);
1523             /* Current sample position is in a temp register */
1524             file = TGSI_FILE_TEMPORARY;
1525             index = emit->fs.sample_pos_tmp_index;
1526          }
1527          else if (index == emit->fs.sample_mask_in_sys_index) {
1528             /* Emitted as vCoverage0.x */
1529             /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1530              * elements where s is the maximum number of color samples supported
1531              * by the implementation. With current implementation, we should not
1532              * have more than one element. So assert if Index != 0
1533              */
1534             assert((!reg->Register.Indirect && reg->Register.Index == 0) ||
1535                    reg->Register.Indirect);
1536             operand0.value = 0;
1537             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1538             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1539             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1540             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1541             emit_dword(emit, operand0.value);
1542             return;
1543          }
1544          else {
1545             /* Map the TGSI system value to a VGPU10 input register */
1546             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1547             file = TGSI_FILE_INPUT;
1548             index = emit->system_value_indexes[index];
1549          }
1550       }
1551    }
1552    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1553       if (file == TGSI_FILE_INPUT) {
1554          if (index == emit->gs.prim_id_index) {
1555             operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1556             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1557          }
1558          index = emit->linkage.input_map[index];
1559       }
1560       else if (file == TGSI_FILE_SYSTEM_VALUE &&
1561                index == emit->gs.invocation_id_sys_index) {
1562          /* Emitted as vGSInstanceID0.x */
1563          operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1564          operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1565          index = 0;
1566       }
1567    }
1568    else if (emit->unit == PIPE_SHADER_VERTEX) {
1569       if (file == TGSI_FILE_INPUT) {
1570          /* if input is adjusted... */
1571          if ((emit->key.vs.adjust_attrib_w_1 |
1572               emit->key.vs.adjust_attrib_itof |
1573               emit->key.vs.adjust_attrib_utof |
1574               emit->key.vs.attrib_is_bgra |
1575               emit->key.vs.attrib_puint_to_snorm |
1576               emit->key.vs.attrib_puint_to_uscaled |
1577               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1578             file = TGSI_FILE_TEMPORARY;
1579             index = emit->vs.adjusted_input[index];
1580          }
1581       }
1582       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1583          if (index == emit->vs.vertex_id_sys_index &&
1584              emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1585             file = TGSI_FILE_TEMPORARY;
1586             index = emit->vs.vertex_id_tmp_index;
1587             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1588          }
1589          else {
1590             /* Map the TGSI system value to a VGPU10 input register */
1591             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1592             file = TGSI_FILE_INPUT;
1593             index = emit->system_value_indexes[index];
1594          }
1595       }
1596    }
1597    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1598 
1599       if (file == TGSI_FILE_SYSTEM_VALUE) {
1600          if (index == emit->tcs.vertices_per_patch_index) {
1601             /**
1602              * if source register is the system value for vertices_per_patch,
1603              * replace it with the immediate.
1604              */
1605             file = TGSI_FILE_IMMEDIATE;
1606             index = emit->tcs.imm_index;
1607             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1608          }
1609          else if (index == emit->tcs.invocation_id_sys_index) {
1610             if (emit->tcs.control_point_phase) {
1611                /**
1612                 * Emitted as vOutputControlPointID.x
1613                 */
1614                operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1615                operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1616                operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1617                operand0.mask = 0;
1618                emit_dword(emit, operand0.value);
1619                return;
1620             }
1621             else {
1622                /* There is no control point ID input declaration in
1623                 * the patch constant phase in hull shader.
1624                 * Since for now we are emitting all instructions in
1625                 * the patch constant phase, we are replacing the
1626                 * control point ID reference with the immediate 0.
1627                 */
1628                file = TGSI_FILE_IMMEDIATE;
1629                index = emit->tcs.imm_index;
1630                swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1631             }
1632          }
1633          else if (index == emit->tcs.prim_id_index) {
1634             /**
1635              * Emitted as vPrim.x
1636              */
1637             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1638             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1639             index = 0;
1640          }
1641       }
1642       else if (file == TGSI_FILE_INPUT) {
1643          index = emit->linkage.input_map[index];
1644          if (!emit->tcs.control_point_phase) {
1645             /* Emitted as vicp */
1646             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1647             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1648             assert(reg->Register.Dimension);
1649          }
1650       }
1651       else if (file == TGSI_FILE_OUTPUT) {
1652          if ((index >= emit->tcs.patch_generic_out_index &&
1653              index < (emit->tcs.patch_generic_out_index +
1654                       emit->tcs.patch_generic_out_count)) ||
1655              index == emit->tcs.inner.tgsi_index ||
1656              index == emit->tcs.outer.tgsi_index) {
1657             if (emit->tcs.control_point_phase) {
1658                emit->discard_instruction = TRUE;
1659             }
1660             else {
1661                /* Device doesn't allow reading from output so
1662                 * use corresponding temporary register as source */
1663                file = TGSI_FILE_TEMPORARY;
1664                if (index == emit->tcs.inner.tgsi_index) {
1665                   index = emit->tcs.inner.temp_index;
1666                }
1667                else if (index == emit->tcs.outer.tgsi_index) {
1668                   index = emit->tcs.outer.temp_index;
1669                }
1670                else {
1671                   index = emit->tcs.patch_generic_tmp_index +
1672                           (index - emit->tcs.patch_generic_out_index);
1673                }
1674 
1675                /**
1676                 * Temporaries for patch constant data can be done
1677                 * as indexable temporaries.
1678                 */
1679                tempArrayId = get_temp_array_id(emit, file, index);
1680                index2d = tempArrayId > 0;
1681                index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1682             }
1683          }
1684          else if (index2d) {
1685             if (emit->tcs.control_point_phase) {
1686                /* Device doesn't allow reading from output so
1687                 * use corresponding temporary register as source */
1688                file = TGSI_FILE_TEMPORARY;
1689                index2d = FALSE;
1690                index = emit->tcs.control_point_tmp_index +
1691                        (index - emit->tcs.control_point_out_index);
1692             }
1693             else {
1694                emit->discard_instruction = TRUE;
1695             }
1696          }
1697       }
1698    }
1699    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1700       if (file == TGSI_FILE_SYSTEM_VALUE) {
1701          if (index == emit->tes.tesscoord_sys_index) {
1702             /**
1703              * Emitted as vDomain
1704              */
1705             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1706             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1707             index = 0;
1708          }
1709          else if (index == emit->tes.inner.tgsi_index) {
1710             file = TGSI_FILE_TEMPORARY;
1711             index = emit->tes.inner.temp_index;
1712          }
1713          else if (index == emit->tes.outer.tgsi_index) {
1714             file = TGSI_FILE_TEMPORARY;
1715             index = emit->tes.outer.temp_index;
1716          }
1717          else if (index == emit->tes.prim_id_index) {
1718             /**
1719              * Emitted as vPrim.x
1720              */
1721             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1722             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1723             index = 0;
1724          }
1725 
1726       }
1727       else if (file == TGSI_FILE_INPUT) {
1728          if (index2d) {
1729             /* 2D input is emitted as vcp (input control point). */
1730             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1731             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1732 
1733             /* index specifies the element index and is remapped
1734              * to align with the tcs output index.
1735              */
1736             index = emit->linkage.input_map[index];
1737 
1738             assert(index2 < emit->key.tes.vertices_per_patch);
1739          }
1740          else {
1741             if (index < emit->key.tes.tessfactor_index)
1742                /* index specifies the generic patch index.
1743                 * Remapped to match up with the tcs output index.
1744                 */
1745                index = emit->linkage.input_map[index];
1746 
1747             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1748             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1749          }
1750       }
1751    }
1752 
1753    if (file == TGSI_FILE_ADDRESS) {
1754       index = emit->address_reg_index[index];
1755       file = TGSI_FILE_TEMPORARY;
1756    }
1757 
1758    if (file == TGSI_FILE_TEMPORARY) {
1759       if (need_temp_reg_initialization(emit, index)) {
1760          emit->initialize_temp_index = index;
1761          emit->discard_instruction = TRUE;
1762       }
1763    }
1764 
1765    if (operand0.value == 0) {
1766       /* if operand0 was not set above for a special case, do the general
1767        * case now.
1768        */
1769       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1770       operand0.operandType = translate_register_file(file, tempArrayId > 0);
1771    }
1772    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1773                                       index2d, indirect2d);
1774 
1775    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1776        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1777       /* there's no swizzle for in-line immediates */
1778       if (swizzleX == swizzleY &&
1779           swizzleX == swizzleZ &&
1780           swizzleX == swizzleW) {
1781          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1782       }
1783       else {
1784          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1785       }
1786 
1787       operand0.swizzleX = swizzleX;
1788       operand0.swizzleY = swizzleY;
1789       operand0.swizzleZ = swizzleZ;
1790       operand0.swizzleW = swizzleW;
1791 
1792       if (absolute || negate) {
1793          operand0.extended = 1;
1794          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1795          if (absolute && !negate)
1796             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1797          if (!absolute && negate)
1798             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1799          if (absolute && negate)
1800             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1801       }
1802    }
1803 
1804    /* Emit the operand tokens */
1805    emit_dword(emit, operand0.value);
1806    if (operand0.extended)
1807       emit_dword(emit, operand1.value);
1808 
1809    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1810       /* Emit the four float/int in-line immediate values */
1811       unsigned *c;
1812       assert(index < ARRAY_SIZE(emit->immediates));
1813       assert(file == TGSI_FILE_IMMEDIATE);
1814       assert(swizzleX < 4);
1815       assert(swizzleY < 4);
1816       assert(swizzleZ < 4);
1817       assert(swizzleW < 4);
1818       c = (unsigned *) emit->immediates[index];
1819       emit_dword(emit, c[swizzleX]);
1820       emit_dword(emit, c[swizzleY]);
1821       emit_dword(emit, c[swizzleZ]);
1822       emit_dword(emit, c[swizzleW]);
1823    }
1824    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1825       /* Emit the register index(es) */
1826       if (index2d) {
1827          emit_dword(emit, index2);
1828 
1829          if (indirect2d) {
1830             emit_indirect_register(emit, reg->DimIndirect.Index);
1831          }
1832       }
1833 
1834       emit_dword(emit, remap_temp_index(emit, file, index));
1835 
1836       if (indirect) {
1837          emit_indirect_register(emit, reg->Indirect.Index);
1838       }
1839    }
1840 }
1841 
1842 
1843 /**
1844  * Emit a resource operand (for use with a SAMPLE instruction).
1845  */
1846 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)1847 emit_resource_register(struct svga_shader_emitter_v10 *emit,
1848                        unsigned resource_number)
1849 {
1850    VGPU10OperandToken0 operand0;
1851 
1852    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1853 
1854    /* init */
1855    operand0.value = 0;
1856 
1857    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1858    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1859    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1860    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1861    operand0.swizzleX = VGPU10_COMPONENT_X;
1862    operand0.swizzleY = VGPU10_COMPONENT_Y;
1863    operand0.swizzleZ = VGPU10_COMPONENT_Z;
1864    operand0.swizzleW = VGPU10_COMPONENT_W;
1865 
1866    emit_dword(emit, operand0.value);
1867    emit_dword(emit, resource_number);
1868 }
1869 
1870 
1871 /**
1872  * Emit a sampler operand (for use with a SAMPLE instruction).
1873  */
1874 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned sampler_number)1875 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1876                       unsigned sampler_number)
1877 {
1878    VGPU10OperandToken0 operand0;
1879 
1880    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1881 
1882    /* init */
1883    operand0.value = 0;
1884 
1885    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1886    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1887 
1888    emit_dword(emit, operand0.value);
1889    emit_dword(emit, sampler_number);
1890 }
1891 
1892 
1893 /**
1894  * Emit an operand which reads the IS_FRONT_FACING register.
1895  */
1896 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)1897 emit_face_register(struct svga_shader_emitter_v10 *emit)
1898 {
1899    VGPU10OperandToken0 operand0;
1900    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1901 
1902    /* init */
1903    operand0.value = 0;
1904 
1905    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1906    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1907    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1908    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1909 
1910    operand0.swizzleX = VGPU10_COMPONENT_X;
1911    operand0.swizzleY = VGPU10_COMPONENT_X;
1912    operand0.swizzleZ = VGPU10_COMPONENT_X;
1913    operand0.swizzleW = VGPU10_COMPONENT_X;
1914 
1915    emit_dword(emit, operand0.value);
1916    emit_dword(emit, index);
1917 }
1918 
1919 
1920 /**
1921  * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
1922  * instruction.
1923  */
1924 static void
emit_rasterizer_register(struct svga_shader_emitter_v10 * emit)1925 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
1926 {
1927    VGPU10OperandToken0 operand0;
1928 
1929    /* init */
1930    operand0.value = 0;
1931 
1932    /* No register index for rasterizer index (there's only one) */
1933    operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
1934    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1935    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1936    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1937    operand0.swizzleX = VGPU10_COMPONENT_X;
1938    operand0.swizzleY = VGPU10_COMPONENT_Y;
1939    operand0.swizzleZ = VGPU10_COMPONENT_Z;
1940    operand0.swizzleW = VGPU10_COMPONENT_W;
1941 
1942    emit_dword(emit, operand0.value);
1943 }
1944 
1945 
1946 /**
1947  * Emit tokens for the "stream" register used by the
1948  * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
1949  */
1950 static void
emit_stream_register(struct svga_shader_emitter_v10 * emit,unsigned index)1951 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
1952 {
1953    VGPU10OperandToken0 operand0;
1954 
1955    /* init */
1956    operand0.value = 0;
1957 
1958    /* No register index for rasterizer index (there's only one) */
1959    operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
1960    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1961    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1962 
1963    emit_dword(emit, operand0.value);
1964    emit_dword(emit, index);
1965 }
1966 
1967 
1968 /**
1969  * Emit the token for a VGPU10 opcode, with precise parameter.
1970  * \param saturate   clamp result to [0,1]?
1971  */
1972 static void
emit_opcode_precise(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,boolean precise)1973 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
1974                     unsigned vgpu10_opcode, boolean saturate, boolean precise)
1975 {
1976    VGPU10OpcodeToken0 token0;
1977 
1978    token0.value = 0;  /* init all fields to zero */
1979    token0.opcodeType = vgpu10_opcode;
1980    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1981    token0.saturate = saturate;
1982 
1983    /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
1984     * 'invariant' declarations.  Only set preciseValues=1 if we have SM5.
1985     */
1986    token0.preciseValues = precise && emit->version >= 50;
1987 
1988    emit_dword(emit, token0.value);
1989 
1990    emit->uses_precise_qualifier |= token0.preciseValues;
1991 }
1992 
1993 
1994 /**
1995  * Emit the token for a VGPU10 opcode.
1996  * \param saturate   clamp result to [0,1]?
1997  */
1998 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate)1999 emit_opcode(struct svga_shader_emitter_v10 *emit,
2000             unsigned vgpu10_opcode, boolean saturate)
2001 {
2002    emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
2003 }
2004 
2005 
2006 /**
2007  * Emit the token for a VGPU10 resinfo instruction.
2008  * \param modifier   return type modifier, _uint or _rcpFloat.
2009  *                   TODO: We may want to remove this parameter if it will
2010  *                   only ever be used as _uint.
2011  */
2012 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)2013 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2014                     VGPU10_RESINFO_RETURN_TYPE modifier)
2015 {
2016    VGPU10OpcodeToken0 token0;
2017 
2018    token0.value = 0;  /* init all fields to zero */
2019    token0.opcodeType = VGPU10_OPCODE_RESINFO;
2020    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2021    token0.resinfoReturnType = modifier;
2022 
2023    emit_dword(emit, token0.value);
2024 }
2025 
2026 
2027 /**
2028  * Emit opcode tokens for a texture sample instruction.  Texture instructions
2029  * can be rather complicated (texel offsets, etc) so we have this specialized
2030  * function.
2031  */
2032 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,const int offsets[3])2033 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2034                    unsigned vgpu10_opcode, boolean saturate,
2035                    const int offsets[3])
2036 {
2037    VGPU10OpcodeToken0 token0;
2038    VGPU10OpcodeToken1 token1;
2039 
2040    token0.value = 0;  /* init all fields to zero */
2041    token0.opcodeType = vgpu10_opcode;
2042    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2043    token0.saturate = saturate;
2044 
2045    if (offsets[0] || offsets[1] || offsets[2]) {
2046       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2047       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2048       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2049       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2050       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2051       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2052 
2053       token0.extended = 1;
2054       token1.value = 0;
2055       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2056       token1.offsetU = offsets[0];
2057       token1.offsetV = offsets[1];
2058       token1.offsetW = offsets[2];
2059    }
2060 
2061    emit_dword(emit, token0.value);
2062    if (token0.extended) {
2063       emit_dword(emit, token1.value);
2064    }
2065 }
2066 
2067 
2068 /**
2069  * Emit a DISCARD opcode token.
2070  * If nonzero is set, we'll discard the fragment if the X component is not 0.
2071  * Otherwise, we'll discard the fragment if the X component is 0.
2072  */
2073 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,boolean nonzero)2074 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
2075 {
2076    VGPU10OpcodeToken0 opcode0;
2077 
2078    opcode0.value = 0;
2079    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2080    if (nonzero)
2081       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2082 
2083    emit_dword(emit, opcode0.value);
2084 }
2085 
2086 
2087 /**
2088  * We need to call this before we begin emitting a VGPU10 instruction.
2089  */
2090 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)2091 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2092 {
2093    assert(emit->inst_start_token == 0);
2094    /* Save location of the instruction's VGPU10OpcodeToken0 token.
2095     * Note, we can't save a pointer because it would become invalid if
2096     * we have to realloc the output buffer.
2097     */
2098    emit->inst_start_token = emit_get_num_tokens(emit);
2099 }
2100 
2101 
2102 /**
2103  * We need to call this after we emit the last token of a VGPU10 instruction.
2104  * This function patches in the opcode token's instructionLength field.
2105  */
2106 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)2107 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2108 {
2109    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2110    unsigned inst_length;
2111 
2112    assert(emit->inst_start_token > 0);
2113 
2114    if (emit->discard_instruction) {
2115       /* Back up the emit->ptr to where this instruction started so
2116        * that we discard the current instruction.
2117        */
2118       emit->ptr = (char *) (tokens + emit->inst_start_token);
2119    }
2120    else {
2121       /* Compute instruction length and patch that into the start of
2122        * the instruction.
2123        */
2124       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2125 
2126       assert(inst_length > 0);
2127 
2128       tokens[emit->inst_start_token].instructionLength = inst_length;
2129    }
2130 
2131    emit->inst_start_token = 0; /* reset to zero for error checking */
2132    emit->discard_instruction = FALSE;
2133 }
2134 
2135 
2136 /**
2137  * Return index for a free temporary register.
2138  */
2139 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)2140 get_temp_index(struct svga_shader_emitter_v10 *emit)
2141 {
2142    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2143    return emit->num_shader_temps + emit->internal_temp_count++;
2144 }
2145 
2146 
2147 /**
2148  * Release the temporaries which were generated by get_temp_index().
2149  */
2150 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)2151 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2152 {
2153    emit->internal_temp_count = 0;
2154 }
2155 
2156 
2157 /**
2158  * Create a tgsi_full_src_register.
2159  */
2160 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)2161 make_src_reg(enum tgsi_file_type file, unsigned index)
2162 {
2163    struct tgsi_full_src_register reg;
2164 
2165    memset(&reg, 0, sizeof(reg));
2166    reg.Register.File = file;
2167    reg.Register.Index = index;
2168    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2169    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2170    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2171    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2172    return reg;
2173 }
2174 
2175 
2176 /**
2177  * Create a tgsi_full_src_register with a swizzle such that all four
2178  * vector components have the same scalar value.
2179  */
2180 static struct tgsi_full_src_register
make_src_scalar_reg(enum tgsi_file_type file,unsigned index,unsigned component)2181 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2182 {
2183    struct tgsi_full_src_register reg;
2184 
2185    assert(component >= TGSI_SWIZZLE_X);
2186    assert(component <= TGSI_SWIZZLE_W);
2187 
2188    memset(&reg, 0, sizeof(reg));
2189    reg.Register.File = file;
2190    reg.Register.Index = index;
2191    reg.Register.SwizzleX =
2192    reg.Register.SwizzleY =
2193    reg.Register.SwizzleZ =
2194    reg.Register.SwizzleW = component;
2195    return reg;
2196 }
2197 
2198 
2199 /**
2200  * Create a tgsi_full_src_register for a temporary.
2201  */
2202 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)2203 make_src_temp_reg(unsigned index)
2204 {
2205    return make_src_reg(TGSI_FILE_TEMPORARY, index);
2206 }
2207 
2208 
2209 /**
2210  * Create a tgsi_full_src_register for a constant.
2211  */
2212 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)2213 make_src_const_reg(unsigned index)
2214 {
2215    return make_src_reg(TGSI_FILE_CONSTANT, index);
2216 }
2217 
2218 
2219 /**
2220  * Create a tgsi_full_src_register for an immediate constant.
2221  */
2222 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)2223 make_src_immediate_reg(unsigned index)
2224 {
2225    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2226 }
2227 
2228 
2229 /**
2230  * Create a tgsi_full_dst_register.
2231  */
2232 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)2233 make_dst_reg(enum tgsi_file_type file, unsigned index)
2234 {
2235    struct tgsi_full_dst_register reg;
2236 
2237    memset(&reg, 0, sizeof(reg));
2238    reg.Register.File = file;
2239    reg.Register.Index = index;
2240    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2241    return reg;
2242 }
2243 
2244 
2245 /**
2246  * Create a tgsi_full_dst_register for a temporary.
2247  */
2248 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)2249 make_dst_temp_reg(unsigned index)
2250 {
2251    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2252 }
2253 
2254 
2255 /**
2256  * Create a tgsi_full_dst_register for an output.
2257  */
2258 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)2259 make_dst_output_reg(unsigned index)
2260 {
2261    return make_dst_reg(TGSI_FILE_OUTPUT, index);
2262 }
2263 
2264 
2265 /**
2266  * Create negated tgsi_full_src_register.
2267  */
2268 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)2269 negate_src(const struct tgsi_full_src_register *reg)
2270 {
2271    struct tgsi_full_src_register neg = *reg;
2272    neg.Register.Negate = !reg->Register.Negate;
2273    return neg;
2274 }
2275 
2276 /**
2277  * Create absolute value of a tgsi_full_src_register.
2278  */
2279 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)2280 absolute_src(const struct tgsi_full_src_register *reg)
2281 {
2282    struct tgsi_full_src_register absolute = *reg;
2283    absolute.Register.Absolute = 1;
2284    return absolute;
2285 }
2286 
2287 
2288 /** Return the named swizzle term from the src register */
2289 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)2290 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2291 {
2292    switch (term) {
2293    case TGSI_SWIZZLE_X:
2294       return reg->Register.SwizzleX;
2295    case TGSI_SWIZZLE_Y:
2296       return reg->Register.SwizzleY;
2297    case TGSI_SWIZZLE_Z:
2298       return reg->Register.SwizzleZ;
2299    case TGSI_SWIZZLE_W:
2300       return reg->Register.SwizzleW;
2301    default:
2302       assert(!"Bad swizzle");
2303       return TGSI_SWIZZLE_X;
2304    }
2305 }
2306 
2307 
2308 /**
2309  * Create swizzled tgsi_full_src_register.
2310  */
2311 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)2312 swizzle_src(const struct tgsi_full_src_register *reg,
2313             enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2314             enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2315 {
2316    struct tgsi_full_src_register swizzled = *reg;
2317    /* Note: we swizzle the current swizzle */
2318    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2319    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2320    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2321    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2322    return swizzled;
2323 }
2324 
2325 
2326 /**
2327  * Create swizzled tgsi_full_src_register where all the swizzle
2328  * terms are the same.
2329  */
2330 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)2331 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2332 {
2333    struct tgsi_full_src_register swizzled = *reg;
2334    /* Note: we swizzle the current swizzle */
2335    swizzled.Register.SwizzleX =
2336    swizzled.Register.SwizzleY =
2337    swizzled.Register.SwizzleZ =
2338    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2339    return swizzled;
2340 }
2341 
2342 
2343 /**
2344  * Create new tgsi_full_dst_register with writemask.
2345  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
2346  */
2347 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)2348 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2349 {
2350    struct tgsi_full_dst_register masked = *reg;
2351    masked.Register.WriteMask = mask;
2352    return masked;
2353 }
2354 
2355 
2356 /**
2357  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2358  */
2359 static boolean
same_swizzle_terms(const struct tgsi_full_src_register * reg)2360 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2361 {
2362    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2363            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2364            reg->Register.SwizzleZ == reg->Register.SwizzleW);
2365 }
2366 
2367 
2368 /**
2369  * Search the vector for the value 'x' and return its position.
2370  */
2371 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)2372 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2373                  union tgsi_immediate_data x)
2374 {
2375    unsigned i;
2376    for (i = 0; i < 4; i++) {
2377       if (vec[i].Int == x.Int)
2378          return i;
2379    }
2380    return -1;
2381 }
2382 
2383 
2384 /**
2385  * Helper used by make_immediate_reg(), make_immediate_reg_4().
2386  */
2387 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)2388 find_immediate(struct svga_shader_emitter_v10 *emit,
2389                union tgsi_immediate_data x, unsigned startIndex)
2390 {
2391    const unsigned endIndex = emit->num_immediates;
2392    unsigned i;
2393 
2394    assert(emit->immediates_emitted);
2395 
2396    /* Search immediates for x, y, z, w */
2397    for (i = startIndex; i < endIndex; i++) {
2398       if (x.Int == emit->immediates[i][0].Int ||
2399           x.Int == emit->immediates[i][1].Int ||
2400           x.Int == emit->immediates[i][2].Int ||
2401           x.Int == emit->immediates[i][3].Int) {
2402          return i;
2403       }
2404    }
2405    /* Should never try to use an immediate value that wasn't pre-declared */
2406    assert(!"find_immediate() failed!");
2407    return -1;
2408 }
2409 
2410 
2411 /**
2412  * As above, but search for a double[2] pair.
2413  */
2414 static int
find_immediate_dbl(struct svga_shader_emitter_v10 * emit,double x,double y)2415 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2416                    double x, double y)
2417 {
2418    const unsigned endIndex = emit->num_immediates;
2419    unsigned i;
2420 
2421    assert(emit->immediates_emitted);
2422 
2423    /* Search immediates for x, y, z, w */
2424    for (i = 0; i < endIndex; i++) {
2425       if (x == emit->immediates_dbl[i][0] &&
2426           y == emit->immediates_dbl[i][1]) {
2427          return i;
2428       }
2429    }
2430    /* Should never try to use an immediate value that wasn't pre-declared */
2431    assert(!"find_immediate_dbl() failed!");
2432    return -1;
2433 }
2434 
2435 
2436 
2437 /**
2438  * Return a tgsi_full_src_register for an immediate/literal
2439  * union tgsi_immediate_data[4] value.
2440  * Note: the values must have been previously declared/allocated in
2441  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
2442  * vec4 immediate.
2443  */
2444 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2445 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2446                      const union tgsi_immediate_data imm[4])
2447 {
2448    struct tgsi_full_src_register reg;
2449    unsigned i;
2450 
2451    for (i = 0; i < emit->num_common_immediates; i++) {
2452       /* search for first component value */
2453       int immpos = find_immediate(emit, imm[0], i);
2454       int x, y, z, w;
2455 
2456       assert(immpos >= 0);
2457 
2458       /* find remaining components within the immediate vector */
2459       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2460       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2461       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2462       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2463 
2464       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
2465          /* found them all */
2466          memset(&reg, 0, sizeof(reg));
2467          reg.Register.File = TGSI_FILE_IMMEDIATE;
2468          reg.Register.Index = immpos;
2469          reg.Register.SwizzleX = x;
2470          reg.Register.SwizzleY = y;
2471          reg.Register.SwizzleZ = z;
2472          reg.Register.SwizzleW = w;
2473          return reg;
2474       }
2475       /* else, keep searching */
2476    }
2477 
2478    assert(!"Failed to find immediate register!");
2479 
2480    /* Just return IMM[0].xxxx */
2481    memset(&reg, 0, sizeof(reg));
2482    reg.Register.File = TGSI_FILE_IMMEDIATE;
2483    return reg;
2484 }
2485 
2486 
2487 /**
2488  * Return a tgsi_full_src_register for an immediate/literal
2489  * union tgsi_immediate_data value of the form {value, value, value, value}.
2490  * \sa make_immediate_reg_4() regarding allowed values.
2491  */
2492 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)2493 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2494                    union tgsi_immediate_data value)
2495 {
2496    struct tgsi_full_src_register reg;
2497    int immpos = find_immediate(emit, value, 0);
2498 
2499    assert(immpos >= 0);
2500 
2501    memset(&reg, 0, sizeof(reg));
2502    reg.Register.File = TGSI_FILE_IMMEDIATE;
2503    reg.Register.Index = immpos;
2504    reg.Register.SwizzleX =
2505    reg.Register.SwizzleY =
2506    reg.Register.SwizzleZ =
2507    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2508 
2509    return reg;
2510 }
2511 
2512 
2513 /**
2514  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2515  * \sa make_immediate_reg_4() regarding allowed values.
2516  */
2517 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2518 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2519                           float x, float y, float z, float w)
2520 {
2521    union tgsi_immediate_data imm[4];
2522    imm[0].Float = x;
2523    imm[1].Float = y;
2524    imm[2].Float = z;
2525    imm[3].Float = w;
2526    return make_immediate_reg_4(emit, imm);
2527 }
2528 
2529 
2530 /**
2531  * Return a tgsi_full_src_register for an immediate/literal float value
2532  * of the form {value, value, value, value}.
2533  * \sa make_immediate_reg_4() regarding allowed values.
2534  */
2535 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)2536 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2537 {
2538    union tgsi_immediate_data imm;
2539    imm.Float = value;
2540    return make_immediate_reg(emit, imm);
2541 }
2542 
2543 
2544 /**
2545  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2546  */
2547 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2548 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2549                         int x, int y, int z, int w)
2550 {
2551    union tgsi_immediate_data imm[4];
2552    imm[0].Int = x;
2553    imm[1].Int = y;
2554    imm[2].Int = z;
2555    imm[3].Int = w;
2556    return make_immediate_reg_4(emit, imm);
2557 }
2558 
2559 
2560 /**
2561  * Return a tgsi_full_src_register for an immediate/literal int value
2562  * of the form {value, value, value, value}.
2563  * \sa make_immediate_reg_4() regarding allowed values.
2564  */
2565 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)2566 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2567 {
2568    union tgsi_immediate_data imm;
2569    imm.Int = value;
2570    return make_immediate_reg(emit, imm);
2571 }
2572 
2573 
2574 static struct tgsi_full_src_register
make_immediate_reg_double(struct svga_shader_emitter_v10 * emit,double value)2575 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2576 {
2577    struct tgsi_full_src_register reg;
2578    int immpos = find_immediate_dbl(emit, value, value);
2579 
2580    assert(immpos >= 0);
2581 
2582    memset(&reg, 0, sizeof(reg));
2583    reg.Register.File = TGSI_FILE_IMMEDIATE;
2584    reg.Register.Index = immpos;
2585    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2586    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2587    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2588    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2589 
2590    return reg;
2591 }
2592 
2593 
2594 /**
2595  * Allocate space for a union tgsi_immediate_data[4] immediate.
2596  * \return  the index/position of the immediate.
2597  */
2598 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2599 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2600                   const union tgsi_immediate_data imm[4])
2601 {
2602    unsigned n = emit->num_immediates++;
2603    assert(!emit->immediates_emitted);
2604    assert(n < ARRAY_SIZE(emit->immediates));
2605    emit->immediates[n][0] = imm[0];
2606    emit->immediates[n][1] = imm[1];
2607    emit->immediates[n][2] = imm[2];
2608    emit->immediates[n][3] = imm[3];
2609    return n;
2610 }
2611 
2612 
2613 /**
2614  * Allocate space for a float[4] immediate.
2615  * \return  the index/position of the immediate.
2616  */
2617 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2618 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2619                        float x, float y, float z, float w)
2620 {
2621    union tgsi_immediate_data imm[4];
2622    imm[0].Float = x;
2623    imm[1].Float = y;
2624    imm[2].Float = z;
2625    imm[3].Float = w;
2626    return alloc_immediate_4(emit, imm);
2627 }
2628 
2629 
2630 /**
2631  * Allocate space for an int[4] immediate.
2632  * \return  the index/position of the immediate.
2633  */
2634 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2635 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2636                        int x, int y, int z, int w)
2637 {
2638    union tgsi_immediate_data imm[4];
2639    imm[0].Int = x;
2640    imm[1].Int = y;
2641    imm[2].Int = z;
2642    imm[3].Int = w;
2643    return alloc_immediate_4(emit, imm);
2644 }
2645 
2646 
2647 static unsigned
alloc_immediate_double2(struct svga_shader_emitter_v10 * emit,double x,double y)2648 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2649                         double x, double y)
2650 {
2651    unsigned n = emit->num_immediates++;
2652    assert(!emit->immediates_emitted);
2653    assert(n < ARRAY_SIZE(emit->immediates));
2654    emit->immediates_dbl[n][0] = x;
2655    emit->immediates_dbl[n][1] = y;
2656    return n;
2657 
2658 }
2659 
2660 
2661 /**
2662  * Allocate a shader input to store a system value.
2663  */
2664 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)2665 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2666 {
2667    const unsigned n = emit->linkage.input_map_max + 1 + index;
2668    assert(index < ARRAY_SIZE(emit->system_value_indexes));
2669    emit->system_value_indexes[index] = n;
2670    return n;
2671 }
2672 
2673 
2674 /**
2675  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2676  */
2677 static boolean
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)2678 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2679                       const struct tgsi_full_immediate *imm)
2680 {
2681    /* We don't actually emit any code here.  We just save the
2682     * immediate values and emit them later.
2683     */
2684    alloc_immediate_4(emit, imm->u);
2685    return TRUE;
2686 }
2687 
2688 
2689 /**
2690  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2691  * containing all the immediate values previously allocated
2692  * with alloc_immediate_4().
2693  */
2694 static boolean
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)2695 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2696 {
2697    VGPU10OpcodeToken0 token;
2698 
2699    assert(!emit->immediates_emitted);
2700 
2701    token.value = 0;
2702    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2703    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2704 
2705    /* Note: no begin/end_emit_instruction() calls */
2706    emit_dword(emit, token.value);
2707    emit_dword(emit, 2 + 4 * emit->num_immediates);
2708    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2709 
2710    emit->immediates_emitted = TRUE;
2711 
2712    return TRUE;
2713 }
2714 
2715 
2716 /**
2717  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2718  * interpolation mode.
2719  * \return a VGPU10_INTERPOLATION_x value
2720  */
2721 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)2722 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2723                         enum tgsi_interpolate_mode interp,
2724                         enum tgsi_interpolate_loc interpolate_loc)
2725 {
2726    if (interp == TGSI_INTERPOLATE_COLOR) {
2727       interp = emit->key.fs.flatshade ?
2728          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2729    }
2730 
2731    switch (interp) {
2732    case TGSI_INTERPOLATE_CONSTANT:
2733       return VGPU10_INTERPOLATION_CONSTANT;
2734    case TGSI_INTERPOLATE_LINEAR:
2735       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2736          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2737       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2738                  emit->version >= 41) {
2739          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2740       } else {
2741          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2742       }
2743       break;
2744    case TGSI_INTERPOLATE_PERSPECTIVE:
2745       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2746          return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2747       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2748                  emit->version >= 41) {
2749          return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2750       } else {
2751          return VGPU10_INTERPOLATION_LINEAR;
2752       }
2753       break;
2754    default:
2755       assert(!"Unexpected interpolation mode");
2756       return VGPU10_INTERPOLATION_CONSTANT;
2757    }
2758 }
2759 
2760 
2761 /**
2762  * Translate a TGSI property to VGPU10.
2763  * Don't emit any instructions yet, only need to gather the primitive property
2764  * information.  The output primitive topology might be changed later. The
2765  * final property instructions will be emitted as part of the pre-helper code.
2766  */
2767 static boolean
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)2768 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
2769                      const struct tgsi_full_property *prop)
2770 {
2771    static const VGPU10_PRIMITIVE primType[] = {
2772       VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
2773       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
2774       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
2775       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
2776       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
2777       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
2778       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
2779       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
2780       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
2781       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
2782       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
2783       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2784       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2785       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2786    };
2787 
2788    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
2789       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
2790       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
2791       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
2792       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
2793       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
2794       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
2795       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
2796       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
2797       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
2798       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
2799       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
2800       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2801       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2802       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2803    };
2804 
2805    static const unsigned inputArraySize[] = {
2806       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
2807       1,       /* VGPU10_PRIMITIVE_POINT */
2808       2,       /* VGPU10_PRIMITIVE_LINE */
2809       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
2810       0,
2811       0,
2812       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
2813       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2814    };
2815 
2816    switch (prop->Property.PropertyName) {
2817    case TGSI_PROPERTY_GS_INPUT_PRIM:
2818       assert(prop->u[0].Data < ARRAY_SIZE(primType));
2819       emit->gs.prim_type = primType[prop->u[0].Data];
2820       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
2821       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
2822       break;
2823 
2824    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2825       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
2826       emit->gs.prim_topology = primTopology[prop->u[0].Data];
2827       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
2828       break;
2829 
2830    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2831       emit->gs.max_out_vertices = prop->u[0].Data;
2832       break;
2833 
2834    case TGSI_PROPERTY_GS_INVOCATIONS:
2835       emit->gs.invocations = prop->u[0].Data;
2836       break;
2837 
2838    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2839    case TGSI_PROPERTY_NEXT_SHADER:
2840    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2841       /* no-op */
2842       break;
2843 
2844    case TGSI_PROPERTY_TCS_VERTICES_OUT:
2845       /* This info is already captured in the shader key */
2846       break;
2847 
2848    case TGSI_PROPERTY_TES_PRIM_MODE:
2849       emit->tes.prim_mode = prop->u[0].Data;
2850       break;
2851 
2852    case TGSI_PROPERTY_TES_SPACING:
2853       emit->tes.spacing = prop->u[0].Data;
2854       break;
2855 
2856    case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
2857       emit->tes.vertices_order_cw = prop->u[0].Data;
2858       break;
2859 
2860    case TGSI_PROPERTY_TES_POINT_MODE:
2861       emit->tes.point_mode = prop->u[0].Data;
2862       break;
2863 
2864    default:
2865       debug_printf("Unexpected TGSI property %s\n",
2866                    tgsi_property_names[prop->Property.PropertyName]);
2867    }
2868 
2869    return TRUE;
2870 }
2871 
2872 
2873 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)2874 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
2875                           VGPU10OpcodeToken0 opcode0, unsigned nData,
2876                           unsigned data)
2877 {
2878    begin_emit_instruction(emit);
2879    emit_dword(emit, opcode0.value);
2880    if (nData)
2881       emit_dword(emit, data);
2882    end_emit_instruction(emit);
2883 }
2884 
2885 
2886 /**
2887  * Emit property instructions
2888  */
2889 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)2890 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
2891 {
2892    VGPU10OpcodeToken0 opcode0;
2893 
2894    assert(emit->unit == PIPE_SHADER_GEOMETRY);
2895 
2896    /* emit input primitive type declaration */
2897    opcode0.value = 0;
2898    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
2899    opcode0.primitive = emit->gs.prim_type;
2900    emit_property_instruction(emit, opcode0, 0, 0);
2901 
2902    /* emit max output vertices */
2903    opcode0.value = 0;
2904    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
2905    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
2906 
2907    if (emit->version >= 50 && emit->gs.invocations > 0) {
2908       opcode0.value = 0;
2909       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
2910       emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
2911    }
2912 }
2913 
2914 
2915 /**
2916  * A helper function to declare tessellator domain in a hull shader or
2917  * in the domain shader.
2918  */
2919 static void
emit_tessellator_domain(struct svga_shader_emitter_v10 * emit,enum pipe_prim_type prim_mode)2920 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
2921                         enum pipe_prim_type prim_mode)
2922 {
2923    VGPU10OpcodeToken0 opcode0;
2924 
2925    opcode0.value = 0;
2926    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
2927    switch (prim_mode) {
2928    case PIPE_PRIM_QUADS:
2929    case PIPE_PRIM_LINES:
2930       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
2931       break;
2932    case PIPE_PRIM_TRIANGLES:
2933       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
2934       break;
2935    default:
2936       debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
2937       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
2938    }
2939    begin_emit_instruction(emit);
2940    emit_dword(emit, opcode0.value);
2941    end_emit_instruction(emit);
2942 }
2943 
2944 
2945 /**
2946  * Emit domain shader declarations.
2947  */
2948 static void
emit_domain_shader_declarations(struct svga_shader_emitter_v10 * emit)2949 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
2950 {
2951    VGPU10OpcodeToken0 opcode0;
2952 
2953    assert(emit->unit == PIPE_SHADER_TESS_EVAL);
2954 
2955    /* Emit the input control point count */
2956    assert(emit->key.tes.vertices_per_patch >= 0 &&
2957           emit->key.tes.vertices_per_patch <= 32);
2958 
2959    opcode0.value = 0;
2960    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
2961    opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
2962    begin_emit_instruction(emit);
2963    emit_dword(emit, opcode0.value);
2964    end_emit_instruction(emit);
2965 
2966    emit_tessellator_domain(emit, emit->tes.prim_mode);
2967 }
2968 
2969 
2970 /**
2971  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
2972  * to implement some instructions.  We pre-allocate those values here
2973  * in the immediate constant buffer.
2974  */
2975 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)2976 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
2977 {
2978    unsigned n = 0;
2979 
2980    emit->common_immediate_pos[n++] =
2981       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
2982 
2983    if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
2984       emit->common_immediate_pos[n++] =
2985          alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
2986    }
2987 
2988    emit->common_immediate_pos[n++] =
2989       alloc_immediate_int4(emit, 0, 1, 0, -1);
2990 
2991    if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
2992        emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
2993       emit->common_immediate_pos[n++] =
2994          alloc_immediate_int4(emit, 31, 0, 0, 0);
2995    }
2996 
2997    if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
2998        emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
2999        emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3000       emit->common_immediate_pos[n++] =
3001          alloc_immediate_int4(emit, 32, 0, 0, 0);
3002    }
3003 
3004    if (emit->key.vs.attrib_puint_to_snorm) {
3005       emit->common_immediate_pos[n++] =
3006          alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3007    }
3008 
3009    if (emit->key.vs.attrib_puint_to_uscaled) {
3010       emit->common_immediate_pos[n++] =
3011          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3012    }
3013 
3014    if (emit->key.vs.attrib_puint_to_sscaled) {
3015       emit->common_immediate_pos[n++] =
3016          alloc_immediate_int4(emit, 22, 12, 2, 0);
3017 
3018       emit->common_immediate_pos[n++] =
3019          alloc_immediate_int4(emit, 22, 30, 0, 0);
3020    }
3021 
3022    if (emit->vposition.num_prescale > 1) {
3023       unsigned i;
3024       for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3025          emit->common_immediate_pos[n++] =
3026             alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3027       }
3028    }
3029 
3030    emit->immediates_dbl = (double (*)[2]) emit->immediates;
3031 
3032    if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3033       emit->common_immediate_pos[n++] =
3034          alloc_immediate_double2(emit, -1.0, -1.0);
3035    }
3036 
3037    if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
3038       emit->common_immediate_pos[n++] =
3039          alloc_immediate_double2(emit, 0.0, 0.0);
3040       emit->common_immediate_pos[n++] =
3041          alloc_immediate_double2(emit, 1.0, 1.0);
3042    }
3043 
3044    if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3045       emit->common_immediate_pos[n++] =
3046          alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3047    }
3048 
3049    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3050 
3051    unsigned i;
3052 
3053    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3054       if (emit->key.tex[i].texel_bias) {
3055          /* Replace 0.0f if more immediate float value is needed */
3056          emit->common_immediate_pos[n++] =
3057             alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3058          break;
3059       }
3060    }
3061 
3062    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3063    emit->num_common_immediates = n;
3064 }
3065 
3066 
3067 /**
3068  * Emit hull shader declarations.
3069 */
3070 static void
emit_hull_shader_declarations(struct svga_shader_emitter_v10 * emit)3071 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3072 {
3073    VGPU10OpcodeToken0 opcode0;
3074 
3075    /* Emit the input control point count */
3076    assert(emit->key.tcs.vertices_per_patch > 0 &&
3077           emit->key.tcs.vertices_per_patch <= 32);
3078 
3079    opcode0.value = 0;
3080    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3081    opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3082    begin_emit_instruction(emit);
3083    emit_dword(emit, opcode0.value);
3084    end_emit_instruction(emit);
3085 
3086    /* Emit the output control point count */
3087    assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3088 
3089    opcode0.value = 0;
3090    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3091    opcode0.controlPointCount = emit->key.tcs.vertices_out;
3092    begin_emit_instruction(emit);
3093    emit_dword(emit, opcode0.value);
3094    end_emit_instruction(emit);
3095 
3096    /* Emit tessellator domain */
3097    emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3098 
3099    /* Emit tessellator output primitive */
3100    opcode0.value = 0;
3101    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3102    if (emit->key.tcs.point_mode) {
3103       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3104    }
3105    else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
3106       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3107    }
3108    else {
3109       assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
3110              emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
3111 
3112       if (emit->key.tcs.vertices_order_cw)
3113          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3114       else
3115          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3116    }
3117    begin_emit_instruction(emit);
3118    emit_dword(emit, opcode0.value);
3119    end_emit_instruction(emit);
3120 
3121    /* Emit tessellator partitioning */
3122    opcode0.value = 0;
3123    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3124    switch (emit->key.tcs.spacing) {
3125    case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3126       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3127       break;
3128    case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3129       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3130       break;
3131    case PIPE_TESS_SPACING_EQUAL:
3132       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3133       break;
3134    default:
3135       debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3136       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3137    }
3138    begin_emit_instruction(emit);
3139    emit_dword(emit, opcode0.value);
3140    end_emit_instruction(emit);
3141 
3142    /* Declare constant registers */
3143    emit_constant_declaration(emit);
3144 
3145    /* Declare samplers and resources */
3146    emit_sampler_declarations(emit);
3147    emit_resource_declarations(emit);
3148 
3149    alloc_common_immediates(emit);
3150 
3151    int nVertices = emit->key.tcs.vertices_per_patch;
3152    emit->tcs.imm_index =
3153       alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3154 
3155    /* Now, emit the constant block containing all the immediates
3156     * declared by shader, as well as the extra ones seen above.
3157     */
3158    emit_vgpu10_immediates_block(emit);
3159 
3160 }
3161 
3162 
3163 /**
3164  * A helper function to determine if control point phase is needed.
3165  * Returns TRUE if there is control point output.
3166  */
3167 static boolean
needs_control_point_phase(struct svga_shader_emitter_v10 * emit)3168 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3169 {
3170    unsigned i;
3171 
3172    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3173 
3174    /* If output control point count does not match the input count,
3175     * we need a control point phase to explicitly set the output control
3176     * points.
3177     */
3178    if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3179        emit->key.tcs.vertices_out)
3180       return TRUE;
3181 
3182    for (i = 0; i < emit->info.num_outputs; i++) {
3183       switch (emit->info.output_semantic_name[i]) {
3184       case TGSI_SEMANTIC_PATCH:
3185       case TGSI_SEMANTIC_TESSOUTER:
3186       case TGSI_SEMANTIC_TESSINNER:
3187          break;
3188       default:
3189          return TRUE;
3190       }
3191    }
3192    return FALSE;
3193 }
3194 
3195 
3196 /**
3197  * A helper function to add shader signature for passthrough control point
3198  * phase. This signature is also generated for passthrough control point
3199  * phase from HLSL compiler and is needed by Metal Renderer.
3200  */
3201 static void
emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 * emit)3202 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3203 {
3204    struct svga_shader_signature *sgn = &emit->signature;
3205    SVGA3dDXShaderSignatureEntry *sgnEntry;
3206    unsigned i;
3207 
3208    for (i = 0; i < emit->info.num_inputs; i++) {
3209       unsigned index = emit->linkage.input_map[i];
3210       enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3211 
3212       sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3213 
3214       set_shader_signature_entry(sgnEntry, index,
3215                                  tgsi_semantic_to_sgn_name[sem_name],
3216                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3217                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3218                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3219 
3220       sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3221 
3222       set_shader_signature_entry(sgnEntry, i,
3223                                  tgsi_semantic_to_sgn_name[sem_name],
3224                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3225                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3226                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3227    }
3228 }
3229 
3230 
3231 /**
3232  * A helper function to emit an instruction to start the control point phase
3233  * in the hull shader.
3234  */
3235 static void
emit_control_point_phase_instruction(struct svga_shader_emitter_v10 * emit)3236 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3237 {
3238    VGPU10OpcodeToken0 opcode0;
3239 
3240    opcode0.value = 0;
3241    opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3242    begin_emit_instruction(emit);
3243    emit_dword(emit, opcode0.value);
3244    end_emit_instruction(emit);
3245 }
3246 
3247 
3248 /**
3249  * Start the hull shader control point phase
3250  */
3251 static boolean
emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 * emit)3252 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3253 {
3254    /* If there is no control point output, skip the control point phase. */
3255    if (!needs_control_point_phase(emit)) {
3256       if (!emit->key.tcs.vertices_out) {
3257          /**
3258           * If the tcs does not explicitly generate any control point output
3259           * and the tes does not use any input control point, then
3260           * emit an empty control point phase with zero output control
3261           * point count.
3262           */
3263          emit_control_point_phase_instruction(emit);
3264 
3265          /**
3266           * Since this is an empty control point phase, we will need to
3267           * add input signatures when we parse the tcs again in the
3268           * patch constant phase.
3269           */
3270          emit->tcs.fork_phase_add_signature = TRUE;
3271       }
3272       else {
3273          /**
3274           * Before skipping the control point phase, add the signature for
3275           * the passthrough control point.
3276           */
3277          emit_passthrough_control_point_signature(emit);
3278       }
3279       return FALSE;
3280    }
3281 
3282    /* Start the control point phase in the hull shader */
3283    emit_control_point_phase_instruction(emit);
3284 
3285    /* Declare the output control point ID */
3286    if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3287       /* Add invocation id declaration if it does not exist */
3288       emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3289    }
3290 
3291    emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3292                           VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3293                           VGPU10_OPERAND_INDEX_0D,
3294                           0, 1,
3295                           VGPU10_NAME_UNDEFINED,
3296                           VGPU10_OPERAND_0_COMPONENT, 0,
3297                           0,
3298                           VGPU10_INTERPOLATION_CONSTANT, TRUE,
3299                           SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3300 
3301    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3302       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3303                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3304                              VGPU10_OPERAND_INDEX_0D,
3305                              0, 1,
3306                              VGPU10_NAME_UNDEFINED,
3307                              VGPU10_OPERAND_0_COMPONENT,
3308                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3309                              0,
3310                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3311                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3312    }
3313 
3314    return TRUE;
3315 }
3316 
3317 
3318 /**
3319  * Start the hull shader patch constant phase and
3320  * do the second pass of the tcs translation and emit
3321  * the relevant declarations and instructions for this phase.
3322  */
3323 static boolean
emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 * emit,struct tgsi_parse_context * parse)3324 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3325                                       struct tgsi_parse_context *parse)
3326 {
3327    unsigned inst_number = 0;
3328    boolean ret = TRUE;
3329    VGPU10OpcodeToken0 opcode0;
3330 
3331    emit->skip_instruction = FALSE;
3332 
3333    /* Start the patch constant phase */
3334    opcode0.value = 0;
3335    opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3336    begin_emit_instruction(emit);
3337    emit_dword(emit, opcode0.value);
3338    end_emit_instruction(emit);
3339 
3340    /* Set the current phase to patch constant phase */
3341    emit->tcs.control_point_phase = FALSE;
3342 
3343    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3344       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3345                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3346                              VGPU10_OPERAND_INDEX_0D,
3347                              0, 1,
3348                              VGPU10_NAME_UNDEFINED,
3349                              VGPU10_OPERAND_0_COMPONENT,
3350                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3351                              0,
3352                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3353                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3354    }
3355 
3356    /* Emit declarations for this phase */
3357    emit->index_range.required =
3358       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
3359    emit_tcs_input_declarations(emit);
3360 
3361    if (emit->index_range.start_index != INVALID_INDEX) {
3362       emit_index_range_declaration(emit);
3363    }
3364 
3365    emit->index_range.required =
3366       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
3367    emit_tcs_output_declarations(emit);
3368 
3369    if (emit->index_range.start_index != INVALID_INDEX) {
3370       emit_index_range_declaration(emit);
3371    }
3372    emit->index_range.required = FALSE;
3373 
3374    emit_temporaries_declaration(emit);
3375 
3376    /* Reset the token position to the first instruction token
3377     * in preparation for the second pass of the shader
3378     */
3379    parse->Position = emit->tcs.instruction_token_pos;
3380 
3381    while (!tgsi_parse_end_of_tokens(parse)) {
3382       tgsi_parse_token(parse);
3383 
3384       assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3385       ret = emit_vgpu10_instruction(emit, inst_number++,
3386                                     &parse->FullToken.FullInstruction);
3387 
3388       /* Usually this applies to TCS only. If shader is reading output of
3389        * patch constant in fork phase, we should reemit all instructions
3390        * which are writting into ouput of patch constant in fork phase
3391        * to store results into temporaries.
3392        */
3393       if (emit->reemit_instruction) {
3394          assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3395          ret = emit_vgpu10_instruction(emit, inst_number,
3396                                        &parse->FullToken.FullInstruction);
3397       }
3398 
3399       if (!ret)
3400          return FALSE;
3401    }
3402 
3403    return TRUE;
3404 }
3405 
3406 
3407 /**
3408  * Emit index range declaration.
3409  */
3410 static boolean
emit_index_range_declaration(struct svga_shader_emitter_v10 * emit)3411 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3412 {
3413    if (emit->version < 50)
3414       return TRUE;
3415 
3416    assert(emit->index_range.start_index != INVALID_INDEX);
3417    assert(emit->index_range.count != 0);
3418    assert(emit->index_range.required);
3419    assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3420    assert(emit->index_range.dim != 0);
3421    assert(emit->index_range.size != 0);
3422 
3423    VGPU10OpcodeToken0 opcode0;
3424    VGPU10OperandToken0 operand0;
3425 
3426    opcode0.value = 0;
3427    opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3428 
3429    operand0.value = 0;
3430    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3431    operand0.indexDimension = emit->index_range.dim;
3432    operand0.operandType = emit->index_range.operandType;
3433    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3434    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3435 
3436    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3437       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3438 
3439    begin_emit_instruction(emit);
3440    emit_dword(emit, opcode0.value);
3441    emit_dword(emit, operand0.value);
3442 
3443    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3444       emit_dword(emit, emit->index_range.size);
3445       emit_dword(emit, emit->index_range.start_index);
3446       emit_dword(emit, emit->index_range.count);
3447    }
3448    else {
3449       emit_dword(emit, emit->index_range.start_index);
3450       emit_dword(emit, emit->index_range.count);
3451    }
3452 
3453    end_emit_instruction(emit);
3454 
3455    /* Reset fields in emit->index_range struct except
3456     * emit->index_range.required which will be reset afterwards
3457     */
3458    emit->index_range.count = 0;
3459    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3460    emit->index_range.start_index = INVALID_INDEX;
3461    emit->index_range.size = 0;
3462    emit->index_range.dim = 0;
3463 
3464    return TRUE;
3465 }
3466 
3467 
3468 /**
3469  * Emit a vgpu10 declaration "instruction".
3470  * \param index  the register index
3471  * \param size   array size of the operand. In most cases, it is 1,
3472  *               but for inputs to geometry shader, the array size varies
3473  *               depending on the primitive type.
3474  */
3475 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)3476 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3477                       VGPU10OpcodeToken0 opcode0,
3478                       VGPU10OperandToken0 operand0,
3479                       VGPU10NameToken name_token,
3480                       unsigned index, unsigned size)
3481 {
3482    assert(opcode0.opcodeType);
3483    assert(operand0.mask ||
3484           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3485           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3486           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3487           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3488           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3489           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3490           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3491           (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3492 
3493    begin_emit_instruction(emit);
3494    emit_dword(emit, opcode0.value);
3495 
3496    emit_dword(emit, operand0.value);
3497 
3498    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3499       /* Next token is the index of the register to declare */
3500       emit_dword(emit, index);
3501    }
3502    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3503       /* Next token is the size of the register */
3504       emit_dword(emit, size);
3505 
3506       /* Followed by the index of the register */
3507       emit_dword(emit, index);
3508    }
3509 
3510    if (name_token.value) {
3511       emit_dword(emit, name_token.value);
3512    }
3513 
3514    end_emit_instruction(emit);
3515 }
3516 
3517 
3518 /**
3519  * Emit the declaration for a shader input.
3520  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3521  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3522  * \param dim         index dimension
3523  * \param index       the input register index
3524  * \param size        array size of the operand. In most cases, it is 1,
3525  *                    but for inputs to geometry shader, the array size varies
3526  *                    depending on the primitive type. For tessellation control
3527  *                    shader, the array size is the vertex count per patch.
3528  * \param name        one of VGPU10_NAME_x
3529  * \parma numComp     number of components
3530  * \param selMode     component selection mode
3531  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3532  * \param interpMode  interpolation mode
3533  */
3534 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcodeType,VGPU10_OPERAND_TYPE operandType,VGPU10_OPERAND_INDEX_DIMENSION dim,unsigned index,unsigned size,VGPU10_SYSTEM_NAME name,VGPU10_OPERAND_NUM_COMPONENTS numComp,VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,unsigned usageMask,VGPU10_INTERPOLATION_MODE interpMode,boolean addSignature,SVGA3dDXSignatureSemanticName sgnName)3535 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3536                        VGPU10_OPCODE_TYPE opcodeType,
3537                        VGPU10_OPERAND_TYPE operandType,
3538                        VGPU10_OPERAND_INDEX_DIMENSION dim,
3539                        unsigned index, unsigned size,
3540                        VGPU10_SYSTEM_NAME name,
3541                        VGPU10_OPERAND_NUM_COMPONENTS numComp,
3542                        VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3543                        unsigned usageMask,
3544                        VGPU10_INTERPOLATION_MODE interpMode,
3545                        boolean addSignature,
3546                        SVGA3dDXSignatureSemanticName sgnName)
3547 {
3548    VGPU10OpcodeToken0 opcode0;
3549    VGPU10OperandToken0 operand0;
3550    VGPU10NameToken name_token;
3551 
3552    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3553    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3554           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3555           opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3556           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3557           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3558           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3559    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3560           operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3561           operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3562           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3563           operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3564           operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3565           operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3566           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3567           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3568           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3569           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3570 
3571    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3572    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3573    assert(dim <= VGPU10_OPERAND_INDEX_3D);
3574    assert(name == VGPU10_NAME_UNDEFINED ||
3575           name == VGPU10_NAME_POSITION ||
3576           name == VGPU10_NAME_INSTANCE_ID ||
3577           name == VGPU10_NAME_VERTEX_ID ||
3578           name == VGPU10_NAME_PRIMITIVE_ID ||
3579           name == VGPU10_NAME_IS_FRONT_FACE ||
3580           name == VGPU10_NAME_SAMPLE_INDEX ||
3581           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3582           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3583 
3584    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3585           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3586           interpMode == VGPU10_INTERPOLATION_LINEAR ||
3587           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3588           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3589           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3590           interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3591           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3592 
3593    check_register_index(emit, opcodeType, index);
3594 
3595    opcode0.value = operand0.value = name_token.value = 0;
3596 
3597    opcode0.opcodeType = opcodeType;
3598    opcode0.interpolationMode = interpMode;
3599 
3600    operand0.operandType = operandType;
3601    operand0.numComponents = numComp;
3602    operand0.selectionMode = selMode;
3603    operand0.mask = usageMask;
3604    operand0.indexDimension = dim;
3605    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3606    if (dim == VGPU10_OPERAND_INDEX_2D)
3607       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3608 
3609    name_token.name = name;
3610 
3611    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3612 
3613    if (addSignature) {
3614       struct svga_shader_signature *sgn = &emit->signature;
3615       if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3616          /* Set patch constant signature */
3617          SVGA3dDXShaderSignatureEntry *sgnEntry =
3618             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3619          set_shader_signature_entry(sgnEntry, index,
3620                                     sgnName, usageMask,
3621                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3622                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3623 
3624       } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3625                  operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3626          /* Set input signature */
3627          SVGA3dDXShaderSignatureEntry *sgnEntry =
3628             &sgn->inputs[sgn->header.numInputSignatures++];
3629          set_shader_signature_entry(sgnEntry, index,
3630                                     sgnName, usageMask,
3631                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3632                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3633       }
3634    }
3635 
3636    if (emit->index_range.required) {
3637       /* Here, index_range declaration is only applicable for opcodeType
3638        * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3639        * for operandType VGPU10_OPERAND_TYPE_INPUT,
3640        * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3641        * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3642        */
3643       if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3644            opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3645           (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3646            operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3647            operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3648          if (emit->index_range.start_index != INVALID_INDEX) {
3649             emit_index_range_declaration(emit);
3650          }
3651          return;
3652       }
3653 
3654       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3655          /* Need record new index_range */
3656          emit->index_range.count = 1;
3657          emit->index_range.operandType = operandType;
3658          emit->index_range.start_index = index;
3659          emit->index_range.size = size;
3660          emit->index_range.dim = dim;
3661       }
3662       else if (index !=
3663                (emit->index_range.start_index + emit->index_range.count) ||
3664                emit->index_range.operandType != operandType) {
3665          /* Input index is not contiguous with index range or operandType is
3666           * different from index range's operandType. We need to emit current
3667           * index_range first and then start recording next index range.
3668           */
3669          emit_index_range_declaration(emit);
3670 
3671          emit->index_range.count = 1;
3672          emit->index_range.operandType = operandType;
3673          emit->index_range.start_index = index;
3674          emit->index_range.size = size;
3675          emit->index_range.dim = dim;
3676       }
3677       else if (emit->index_range.operandType == operandType) {
3678          /* Since input index is contiguous with index range and operandType
3679           * is same as index range's operandType, increment index range count.
3680           */
3681          emit->index_range.count++;
3682       }
3683    }
3684 }
3685 
3686 
3687 /**
3688  * Emit the declaration for a shader output.
3689  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
3690  * \param index  the output register index
3691  * \param name  one of VGPU10_NAME_x
3692  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3693  */
3694 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE type,unsigned index,VGPU10_SYSTEM_NAME name,unsigned writemask,boolean addSignature,SVGA3dDXSignatureSemanticName sgnName)3695 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
3696                         VGPU10_OPCODE_TYPE type, unsigned index,
3697                         VGPU10_SYSTEM_NAME name,
3698                         unsigned writemask,
3699                         boolean addSignature,
3700                         SVGA3dDXSignatureSemanticName sgnName)
3701 {
3702    VGPU10OpcodeToken0 opcode0;
3703    VGPU10OperandToken0 operand0;
3704    VGPU10NameToken name_token;
3705 
3706    assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3707    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
3708           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
3709           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
3710    assert(name == VGPU10_NAME_UNDEFINED ||
3711           name == VGPU10_NAME_POSITION ||
3712           name == VGPU10_NAME_PRIMITIVE_ID ||
3713           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3714           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
3715           name == VGPU10_NAME_CLIP_DISTANCE);
3716 
3717    check_register_index(emit, type, index);
3718 
3719    opcode0.value = operand0.value = name_token.value = 0;
3720 
3721    opcode0.opcodeType = type;
3722    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3723    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3724    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
3725    operand0.mask = writemask;
3726    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3727    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3728 
3729    name_token.name = name;
3730 
3731    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
3732 
3733    /* Capture output signature */
3734    if (addSignature) {
3735       struct svga_shader_signature *sgn = &emit->signature;
3736       SVGA3dDXShaderSignatureEntry *sgnEntry =
3737          &sgn->outputs[sgn->header.numOutputSignatures++];
3738       set_shader_signature_entry(sgnEntry, index,
3739                                  sgnName, writemask,
3740                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3741                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3742    }
3743 
3744    if (emit->index_range.required) {
3745       /* Here, index_range declaration is only applicable for opcodeType
3746        * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3747        * VGPU10_OPERAND_TYPE_OUTPUT.
3748        */
3749       if (type != VGPU10_OPCODE_DCL_OUTPUT) {
3750          if (emit->index_range.start_index != INVALID_INDEX) {
3751             emit_index_range_declaration(emit);
3752          }
3753          return;
3754       }
3755 
3756       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3757          /* Need record new index_range */
3758          emit->index_range.count = 1;
3759          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3760          emit->index_range.start_index = index;
3761          emit->index_range.size = 1;
3762          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3763       }
3764       else if (index !=
3765                (emit->index_range.start_index + emit->index_range.count)) {
3766          /* Output index is not contiguous with index range. We need to
3767           * emit current index_range first and then start recording next
3768           * index range.
3769           */
3770          emit_index_range_declaration(emit);
3771 
3772          emit->index_range.count = 1;
3773          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3774          emit->index_range.start_index = index;
3775          emit->index_range.size = 1;
3776          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3777       }
3778       else {
3779          /* Since output index is contiguous with index range, increment
3780           * index range count.
3781           */
3782          emit->index_range.count++;
3783       }
3784    }
3785 }
3786 
3787 
3788 /**
3789  * Emit the declaration for the fragment depth output.
3790  */
3791 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)3792 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
3793 {
3794    VGPU10OpcodeToken0 opcode0;
3795    VGPU10OperandToken0 operand0;
3796    VGPU10NameToken name_token;
3797 
3798    assert(emit->unit == PIPE_SHADER_FRAGMENT);
3799 
3800    opcode0.value = operand0.value = name_token.value = 0;
3801 
3802    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3803    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
3804    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
3805    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3806    operand0.mask = 0;
3807 
3808    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3809 }
3810 
3811 
3812 /**
3813  * Emit the declaration for the fragment sample mask/coverage output.
3814  */
3815 static void
emit_samplemask_output_declaration(struct svga_shader_emitter_v10 * emit)3816 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
3817 {
3818    VGPU10OpcodeToken0 opcode0;
3819    VGPU10OperandToken0 operand0;
3820    VGPU10NameToken name_token;
3821 
3822    assert(emit->unit == PIPE_SHADER_FRAGMENT);
3823    assert(emit->version >= 41);
3824 
3825    opcode0.value = operand0.value = name_token.value = 0;
3826 
3827    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3828    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
3829    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
3830    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3831    operand0.mask = 0;
3832 
3833    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3834 }
3835 
3836 
3837 /**
3838  * Emit output declarations for fragment shader.
3839  */
3840 static void
emit_fs_output_declarations(struct svga_shader_emitter_v10 * emit)3841 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
3842 {
3843    unsigned int i;
3844 
3845    for (i = 0; i < emit->info.num_outputs; i++) {
3846       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
3847       const enum tgsi_semantic semantic_name =
3848          emit->info.output_semantic_name[i];
3849       const unsigned semantic_index = emit->info.output_semantic_index[i];
3850       unsigned index = i;
3851 
3852       if (semantic_name == TGSI_SEMANTIC_COLOR) {
3853          assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
3854 
3855          emit->fs.color_out_index[semantic_index] = index;
3856 
3857          emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
3858                                               index + 1);
3859 
3860          /* The semantic index is the shader's color output/buffer index */
3861          emit_output_declaration(emit,
3862                                  VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
3863                                  VGPU10_NAME_UNDEFINED,
3864                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3865                                  TRUE,
3866                                  map_tgsi_semantic_to_sgn_name(semantic_name));
3867 
3868          if (semantic_index == 0) {
3869             if (emit->key.fs.write_color0_to_n_cbufs > 1) {
3870                /* Emit declarations for the additional color outputs
3871                 * for broadcasting.
3872                 */
3873                unsigned j;
3874                for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
3875                   /* Allocate a new output index */
3876                   unsigned idx = emit->info.num_outputs + j - 1;
3877                   emit->fs.color_out_index[j] = idx;
3878                   emit_output_declaration(emit,
3879                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
3880                                         VGPU10_NAME_UNDEFINED,
3881                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3882                                         TRUE,
3883                                         map_tgsi_semantic_to_sgn_name(semantic_name));
3884                   emit->info.output_semantic_index[idx] = j;
3885                }
3886 
3887                emit->fs.num_color_outputs =
3888                      emit->key.fs.write_color0_to_n_cbufs;
3889             }
3890          }
3891       }
3892       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
3893          /* Fragment depth output */
3894          emit_fragdepth_output_declaration(emit);
3895       }
3896       else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
3897          /* Sample mask output */
3898          emit_samplemask_output_declaration(emit);
3899       }
3900       else {
3901          assert(!"Bad output semantic name");
3902       }
3903    }
3904 }
3905 
3906 
3907 /**
3908  * Emit common output declaration for vertex processing.
3909  */
3910 static void
emit_vertex_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned writemask,boolean addSignature)3911 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
3912                                unsigned index, unsigned writemask,
3913                                boolean addSignature)
3914 {
3915    const enum tgsi_semantic semantic_name =
3916          emit->info.output_semantic_name[index];
3917    const unsigned semantic_index = emit->info.output_semantic_index[index];
3918    unsigned name, type;
3919    unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3920 
3921    assert(emit->unit != PIPE_SHADER_FRAGMENT &&
3922           emit->unit != PIPE_SHADER_COMPUTE);
3923 
3924    switch (semantic_name) {
3925    case TGSI_SEMANTIC_POSITION:
3926       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
3927          /* position will be declared in control point only */
3928          assert(emit->tcs.control_point_phase);
3929          type = VGPU10_OPCODE_DCL_OUTPUT;
3930          name = VGPU10_NAME_UNDEFINED;
3931          emit_output_declaration(emit, type, index, name, final_mask, TRUE,
3932                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3933          return;
3934       }
3935       else {
3936          type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3937          name = VGPU10_NAME_POSITION;
3938       }
3939       /* Save the index of the vertex position output register */
3940       emit->vposition.out_index = index;
3941       break;
3942    case TGSI_SEMANTIC_CLIPDIST:
3943       type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3944       name = VGPU10_NAME_CLIP_DISTANCE;
3945       /* save the starting index of the clip distance output register */
3946       if (semantic_index == 0)
3947          emit->clip_dist_out_index = index;
3948       final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
3949       if (final_mask == 0x0)
3950          return; /* discard this do-nothing declaration */
3951       break;
3952    case TGSI_SEMANTIC_CLIPVERTEX:
3953       type = VGPU10_OPCODE_DCL_OUTPUT;
3954       name = VGPU10_NAME_UNDEFINED;
3955       emit->clip_vertex_out_index = index;
3956       break;
3957    default:
3958       /* generic output */
3959       type = VGPU10_OPCODE_DCL_OUTPUT;
3960       name = VGPU10_NAME_UNDEFINED;
3961    }
3962 
3963    emit_output_declaration(emit, type, index, name, final_mask, addSignature,
3964                            map_tgsi_semantic_to_sgn_name(semantic_name));
3965 }
3966 
3967 
3968 /**
3969  * Emit declaration for outputs in vertex shader.
3970  */
3971 static void
emit_vs_output_declarations(struct svga_shader_emitter_v10 * emit)3972 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
3973 {
3974    unsigned i;
3975    for (i = 0; i < emit->info.num_outputs; i++) {
3976       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
3977    }
3978 }
3979 
3980 
3981 /**
3982  * A helper function to determine the writemask for an output
3983  * for the specified stream.
3984  */
3985 static unsigned
output_writemask_for_stream(unsigned stream,ubyte output_streams,ubyte output_usagemask)3986 output_writemask_for_stream(unsigned stream, ubyte output_streams,
3987                                  ubyte output_usagemask)
3988 {
3989    unsigned i;
3990    unsigned writemask = 0;
3991 
3992    for (i = 0; i < 4; i++) {
3993       if ((output_streams & 0x3) == stream)
3994          writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
3995       output_streams >>= 2;
3996    }
3997    return writemask & output_usagemask;
3998 }
3999 
4000 
4001 /**
4002  * Emit declaration for outputs in geometry shader.
4003  */
4004 static void
emit_gs_output_declarations(struct svga_shader_emitter_v10 * emit)4005 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4006 {
4007    unsigned i;
4008    VGPU10OpcodeToken0 opcode0;
4009    unsigned numStreamsSupported = 1;
4010    int s;
4011 
4012    if (emit->version >= 50) {
4013       numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4014    }
4015 
4016    /**
4017     * Start emitting from the last stream first, so we end with
4018     * stream 0, so any of the auxiliary output declarations will
4019     * go to stream 0.
4020     */
4021    for (s = numStreamsSupported-1; s >= 0; s--) {
4022 
4023       if (emit->info.num_stream_output_components[s] == 0)
4024          continue;
4025 
4026       if (emit->version >= 50) {
4027          /* DCL_STREAM stream */
4028          begin_emit_instruction(emit);
4029          emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
4030          emit_stream_register(emit, s);
4031          end_emit_instruction(emit);
4032       }
4033 
4034       /* emit output primitive topology declaration */
4035       opcode0.value = 0;
4036       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4037       opcode0.primitiveTopology = emit->gs.prim_topology;
4038       emit_property_instruction(emit, opcode0, 0, 0);
4039 
4040       for (i = 0; i < emit->info.num_outputs; i++) {
4041          unsigned writemask;
4042 
4043          /* find out the writemask for this stream */
4044          writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4045                                                  emit->output_usage_mask[i]);
4046 
4047          if (writemask) {
4048             enum tgsi_semantic semantic_name =
4049                emit->info.output_semantic_name[i];
4050 
4051             /* TODO: Still need to take care of a special case where a
4052              *       single varying spans across multiple output registers.
4053              */
4054             switch(semantic_name) {
4055             case TGSI_SEMANTIC_PRIMID:
4056                emit_output_declaration(emit,
4057                                        VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4058                                        VGPU10_NAME_PRIMITIVE_ID,
4059                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4060                                        FALSE,
4061                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4062                break;
4063             case TGSI_SEMANTIC_LAYER:
4064                emit_output_declaration(emit,
4065                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4066                                        VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4067                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4068                                        FALSE,
4069                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4070                break;
4071             case TGSI_SEMANTIC_VIEWPORT_INDEX:
4072                emit_output_declaration(emit,
4073                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4074                                        VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4075                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4076                                        FALSE,
4077                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4078                emit->gs.viewport_index_out_index = i;
4079                break;
4080             default:
4081                emit_vertex_output_declaration(emit, i, writemask, FALSE);
4082             }
4083          }
4084       }
4085    }
4086 
4087    /* For geometry shader outputs, it is possible the same register is
4088     * declared multiple times for different streams. So to avoid
4089     * redundant signature entries, geometry shader output signature is done
4090     * outside of the declaration.
4091     */
4092    struct svga_shader_signature *sgn = &emit->signature;
4093    SVGA3dDXShaderSignatureEntry *sgnEntry;
4094 
4095    for (i = 0; i < emit->info.num_outputs; i++) {
4096       if (emit->output_usage_mask[i]) {
4097          enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4098 
4099          sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4100          set_shader_signature_entry(sgnEntry, i,
4101                                     map_tgsi_semantic_to_sgn_name(sem_name),
4102                                     emit->output_usage_mask[i],
4103                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4104                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4105       }
4106    }
4107 }
4108 
4109 
4110 /**
4111  * Emit the declaration for the tess inner/outer output.
4112  * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4113  * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4114  * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4115  */
4116 static void
emit_tesslevel_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned opcodeType,unsigned operandType,VGPU10_SYSTEM_NAME name,SVGA3dDXSignatureSemanticName sgnName)4117 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4118                            unsigned index, unsigned opcodeType,
4119                            unsigned operandType, VGPU10_SYSTEM_NAME name,
4120                            SVGA3dDXSignatureSemanticName sgnName)
4121 {
4122    VGPU10OpcodeToken0 opcode0;
4123    VGPU10OperandToken0 operand0;
4124    VGPU10NameToken name_token;
4125 
4126    assert(emit->version >= 50);
4127    assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4128           (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
4129            name == VGPU10_NAME_UNDEFINED));
4130    assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4131 
4132    assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4133           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4134 
4135    opcode0.value = operand0.value = name_token.value = 0;
4136 
4137    opcode0.opcodeType = opcodeType;
4138    operand0.operandType = operandType;
4139    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4140    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4141    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4142    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4143    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4144 
4145    name_token.name = name;
4146    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4147 
4148    /* Capture patch constant signature */
4149    struct svga_shader_signature *sgn = &emit->signature;
4150    SVGA3dDXShaderSignatureEntry *sgnEntry =
4151       &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4152    set_shader_signature_entry(sgnEntry, index,
4153                               sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4154                               SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4155                               SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4156 }
4157 
4158 
4159 /**
4160  * Emit output declarations for tessellation control shader.
4161  */
4162 static void
emit_tcs_output_declarations(struct svga_shader_emitter_v10 * emit)4163 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4164 {
4165    unsigned int i;
4166    unsigned outputIndex = emit->num_outputs;
4167    struct svga_shader_signature *sgn = &emit->signature;
4168 
4169    /**
4170     * Initialize patch_generic_out_count so it won't be counted twice
4171     * since this function is called twice, one for control point phase
4172     * and another time for patch constant phase.
4173     */
4174    emit->tcs.patch_generic_out_count = 0;
4175 
4176    for (i = 0; i < emit->info.num_outputs; i++) {
4177       unsigned index = i;
4178       const enum tgsi_semantic semantic_name =
4179          emit->info.output_semantic_name[i];
4180 
4181       switch (semantic_name) {
4182       case TGSI_SEMANTIC_TESSINNER:
4183          emit->tcs.inner.tgsi_index = i;
4184 
4185          /* skip per-patch output declarations in control point phase */
4186          if (emit->tcs.control_point_phase)
4187             break;
4188 
4189          emit->tcs.inner.out_index = outputIndex;
4190          switch (emit->key.tcs.prim_mode) {
4191          case PIPE_PRIM_QUADS:
4192             emit_tesslevel_declaration(emit, outputIndex++,
4193                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4194                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4195                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4196 
4197             emit_tesslevel_declaration(emit, outputIndex++,
4198                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4199                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4200                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4201             break;
4202          case PIPE_PRIM_TRIANGLES:
4203             emit_tesslevel_declaration(emit, outputIndex++,
4204                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4205                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4206                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4207             break;
4208          case PIPE_PRIM_LINES:
4209             break;
4210          default:
4211             debug_printf("Unsupported primitive type");
4212          }
4213          break;
4214 
4215       case TGSI_SEMANTIC_TESSOUTER:
4216          emit->tcs.outer.tgsi_index = i;
4217 
4218          /* skip per-patch output declarations in control point phase */
4219          if (emit->tcs.control_point_phase)
4220             break;
4221 
4222          emit->tcs.outer.out_index = outputIndex;
4223          switch (emit->key.tcs.prim_mode) {
4224          case PIPE_PRIM_QUADS:
4225             for (int j = 0; j < 4; j++) {
4226                emit_tesslevel_declaration(emit, outputIndex++,
4227                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4228                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4229                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4230             }
4231             break;
4232          case PIPE_PRIM_TRIANGLES:
4233             for (int j = 0; j < 3; j++) {
4234                emit_tesslevel_declaration(emit, outputIndex++,
4235                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4236                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4237                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4238             }
4239             break;
4240          case PIPE_PRIM_LINES:
4241             for (int j = 0; j < 2; j++) {
4242                emit_tesslevel_declaration(emit, outputIndex++,
4243                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4244                   VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4245                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4246             }
4247             break;
4248          default:
4249             debug_printf("Unsupported primitive type");
4250          }
4251          break;
4252 
4253       case TGSI_SEMANTIC_PATCH:
4254          if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4255             emit->tcs.patch_generic_out_index= i;
4256          emit->tcs.patch_generic_out_count++;
4257 
4258          /* skip per-patch output declarations in control point phase */
4259          if (emit->tcs.control_point_phase)
4260             break;
4261 
4262          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4263                                  VGPU10_NAME_UNDEFINED,
4264                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4265                                  FALSE,
4266                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4267 
4268          SVGA3dDXShaderSignatureEntry *sgnEntry =
4269             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4270          set_shader_signature_entry(sgnEntry, index,
4271                                     map_tgsi_semantic_to_sgn_name(semantic_name),
4272                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4273                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4274                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4275 
4276          break;
4277 
4278       default:
4279          /* save the starting index of control point outputs */
4280          if (emit->tcs.control_point_out_index == INVALID_INDEX)
4281             emit->tcs.control_point_out_index = i;
4282          emit->tcs.control_point_out_count++;
4283 
4284          /* skip control point output declarations in patch constant phase */
4285          if (!emit->tcs.control_point_phase)
4286             break;
4287 
4288          emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4289                                         TRUE);
4290 
4291       }
4292    }
4293 
4294    if (emit->tcs.control_point_phase) {
4295       /**
4296        * Add missing control point output in control point phase.
4297        */
4298       if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4299          /* use register index after tessellation factors */
4300          switch (emit->key.tcs.prim_mode) {
4301          case PIPE_PRIM_QUADS:
4302             emit->tcs.control_point_out_index = outputIndex + 6;
4303             break;
4304          case PIPE_PRIM_TRIANGLES:
4305             emit->tcs.control_point_out_index = outputIndex + 4;
4306             break;
4307          default:
4308             emit->tcs.control_point_out_index = outputIndex + 2;
4309             break;
4310          }
4311          emit->tcs.control_point_out_count++;
4312          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4313                                  emit->tcs.control_point_out_index,
4314                                  VGPU10_NAME_POSITION,
4315                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4316                                  TRUE,
4317                                  SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4318 
4319          /* If tcs does not output any control point output,
4320           * we can end the hull shader control point phase here
4321           * after emitting the default control point output.
4322           */
4323          emit->skip_instruction = TRUE;
4324       }
4325    }
4326    else {
4327       if (emit->tcs.outer.out_index == INVALID_INDEX) {
4328          /* since the TCS did not declare out outer tess level output register,
4329           * we declare it here for patch constant phase only.
4330           */
4331          emit->tcs.outer.out_index = outputIndex;
4332          if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4333             for (int i = 0; i < 4; i++) {
4334                emit_tesslevel_declaration(emit, outputIndex++,
4335                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4336                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4337                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4338             }
4339          }
4340          else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4341             for (int i = 0; i < 3; i++) {
4342                emit_tesslevel_declaration(emit, outputIndex++,
4343                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4344                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4345                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4346             }
4347          }
4348       }
4349 
4350       if (emit->tcs.inner.out_index == INVALID_INDEX) {
4351          /* since the TCS did not declare out inner tess level output register,
4352           * we declare it here
4353           */
4354          emit->tcs.inner.out_index = outputIndex;
4355          if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4356             emit_tesslevel_declaration(emit, outputIndex++,
4357                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4358                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4359                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4360             emit_tesslevel_declaration(emit, outputIndex++,
4361                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4362                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4363                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4364          }
4365          else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4366             emit_tesslevel_declaration(emit, outputIndex++,
4367                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4368                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4369                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4370          }
4371       }
4372    }
4373    emit->num_outputs = outputIndex;
4374 }
4375 
4376 
4377 /**
4378  * Emit output declarations for tessellation evaluation shader.
4379  */
4380 static void
emit_tes_output_declarations(struct svga_shader_emitter_v10 * emit)4381 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4382 {
4383    unsigned int i;
4384 
4385    for (i = 0; i < emit->info.num_outputs; i++) {
4386       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4387    }
4388 }
4389 
4390 
4391 /**
4392  * Emit the declaration for a system value input/output.
4393  */
4394 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)4395 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4396                               enum tgsi_semantic semantic_name, unsigned index)
4397 {
4398    switch (semantic_name) {
4399    case TGSI_SEMANTIC_INSTANCEID:
4400       index = alloc_system_value_index(emit, index);
4401       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4402                              VGPU10_OPERAND_TYPE_INPUT,
4403                              VGPU10_OPERAND_INDEX_1D,
4404                              index, 1,
4405                              VGPU10_NAME_INSTANCE_ID,
4406                              VGPU10_OPERAND_4_COMPONENT,
4407                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4408                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4409                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4410                              map_tgsi_semantic_to_sgn_name(semantic_name));
4411       break;
4412    case TGSI_SEMANTIC_VERTEXID:
4413       emit->vs.vertex_id_sys_index = index;
4414       index = alloc_system_value_index(emit, index);
4415       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4416                              VGPU10_OPERAND_TYPE_INPUT,
4417                              VGPU10_OPERAND_INDEX_1D,
4418                              index, 1,
4419                              VGPU10_NAME_VERTEX_ID,
4420                              VGPU10_OPERAND_4_COMPONENT,
4421                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4422                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4423                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4424                              map_tgsi_semantic_to_sgn_name(semantic_name));
4425       break;
4426    case TGSI_SEMANTIC_SAMPLEID:
4427       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4428       emit->fs.sample_id_sys_index = index;
4429       index = alloc_system_value_index(emit, index);
4430       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4431                              VGPU10_OPERAND_TYPE_INPUT,
4432                              VGPU10_OPERAND_INDEX_1D,
4433                              index, 1,
4434                              VGPU10_NAME_SAMPLE_INDEX,
4435                              VGPU10_OPERAND_4_COMPONENT,
4436                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4437                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4438                              VGPU10_INTERPOLATION_CONSTANT, TRUE,
4439                              map_tgsi_semantic_to_sgn_name(semantic_name));
4440       break;
4441    case TGSI_SEMANTIC_SAMPLEPOS:
4442       /* This system value contains the position of the current sample
4443        * when using per-sample shading.  We implement this by calling
4444        * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4445        * index as the argument.  See emit_sample_position_instructions().
4446        */
4447       assert(emit->version >= 41);
4448       emit->fs.sample_pos_sys_index = index;
4449       index = alloc_system_value_index(emit, index);
4450       break;
4451    case TGSI_SEMANTIC_INVOCATIONID:
4452       /* Note: invocation id input is mapped to different register depending
4453        * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4454        * In TCS, it will be mapped to vOutputControlPointID#.
4455        * Since in both cases, the mapped name is unique rather than
4456        * just a generic input name ("v#"), so there is no need to remap
4457        * the index value.
4458        */
4459       assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4460              emit->unit == PIPE_SHADER_TESS_CTRL);
4461       assert(emit->version >= 50);
4462 
4463       if (emit->unit == PIPE_SHADER_GEOMETRY) {
4464          emit->gs.invocation_id_sys_index = index;
4465          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4466                                 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4467                                 VGPU10_OPERAND_INDEX_0D,
4468                                 index, 1,
4469                                 VGPU10_NAME_UNDEFINED,
4470                                 VGPU10_OPERAND_0_COMPONENT,
4471                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4472                                 0,
4473                                 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4474                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4475       } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4476          /* The emission of the control point id will be done
4477           * in the control point phase in emit_hull_shader_control_point_phase().
4478           */
4479          emit->tcs.invocation_id_sys_index = index;
4480       }
4481       break;
4482    case TGSI_SEMANTIC_SAMPLEMASK:
4483       /* Note: the PS sample mask input has a unique name ("vCoverage#")
4484        * rather than just a generic input name ("v#") so no need to remap the
4485        * index value.
4486        */
4487       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4488       assert(emit->version >= 50);
4489       emit->fs.sample_mask_in_sys_index = index;
4490       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4491                              VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4492                              VGPU10_OPERAND_INDEX_0D,
4493                              index, 1,
4494                              VGPU10_NAME_UNDEFINED,
4495                              VGPU10_OPERAND_1_COMPONENT,
4496                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4497                              0,
4498                              VGPU10_INTERPOLATION_CONSTANT, TRUE,
4499                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4500       break;
4501    case TGSI_SEMANTIC_TESSCOORD:
4502       assert(emit->version >= 50);
4503 
4504       unsigned usageMask = 0;
4505 
4506       if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4507          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4508       }
4509       else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
4510                emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4511          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4512       }
4513 
4514       emit->tes.tesscoord_sys_index = index;
4515       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4516                              VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4517                              VGPU10_OPERAND_INDEX_0D,
4518                              index, 1,
4519                              VGPU10_NAME_UNDEFINED,
4520                              VGPU10_OPERAND_4_COMPONENT,
4521                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4522                              usageMask,
4523                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4524                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4525       break;
4526    case TGSI_SEMANTIC_TESSINNER:
4527       assert(emit->version >= 50);
4528       emit->tes.inner.tgsi_index = index;
4529       break;
4530    case TGSI_SEMANTIC_TESSOUTER:
4531       assert(emit->version >= 50);
4532       emit->tes.outer.tgsi_index = index;
4533       break;
4534    case TGSI_SEMANTIC_VERTICESIN:
4535       assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4536       assert(emit->version >= 50);
4537 
4538       /* save the system value index */
4539       emit->tcs.vertices_per_patch_index = index;
4540       break;
4541    case TGSI_SEMANTIC_PRIMID:
4542       assert(emit->version >= 50);
4543       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4544          emit->tcs.prim_id_index = index;
4545       }
4546       else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4547          emit->tes.prim_id_index = index;
4548          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4549                                 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4550                                 VGPU10_OPERAND_INDEX_0D,
4551                                 index, 1,
4552                                 VGPU10_NAME_UNDEFINED,
4553                                 VGPU10_OPERAND_0_COMPONENT,
4554                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4555                                 0,
4556                                 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4557                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4558       }
4559       break;
4560    default:
4561       debug_printf("unexpected system value semantic index %u / %s\n",
4562                    semantic_name, tgsi_semantic_names[semantic_name]);
4563    }
4564 }
4565 
4566 /**
4567  * Translate a TGSI declaration to VGPU10.
4568  */
4569 static boolean
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)4570 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4571                         const struct tgsi_full_declaration *decl)
4572 {
4573    switch (decl->Declaration.File) {
4574    case TGSI_FILE_INPUT:
4575       /* do nothing - see emit_input_declarations() */
4576       return TRUE;
4577 
4578    case TGSI_FILE_OUTPUT:
4579       assert(decl->Range.First == decl->Range.Last);
4580       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4581       return TRUE;
4582 
4583    case TGSI_FILE_TEMPORARY:
4584       /* Don't declare the temps here.  Just keep track of how many
4585        * and emit the declaration later.
4586        */
4587       if (decl->Declaration.Array) {
4588          /* Indexed temporary array.  Save the start index of the array
4589           * and the size of the array.
4590           */
4591          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4592          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4593 
4594          /* Save this array so we can emit the declaration for it later */
4595          create_temp_array(emit, arrayID, decl->Range.First,
4596                            decl->Range.Last - decl->Range.First + 1,
4597                            decl->Range.First);
4598       }
4599 
4600       /* for all temps, indexed or not, keep track of highest index */
4601       emit->num_shader_temps = MAX2(emit->num_shader_temps,
4602                                     decl->Range.Last + 1);
4603       return TRUE;
4604 
4605    case TGSI_FILE_CONSTANT:
4606       /* Don't declare constants here.  Just keep track and emit later. */
4607       {
4608          unsigned constbuf = 0, num_consts;
4609          if (decl->Declaration.Dimension) {
4610             constbuf = decl->Dim.Index2D;
4611          }
4612          /* We throw an assertion here when, in fact, the shader should never
4613           * have linked due to constbuf index out of bounds, so we shouldn't
4614           * have reached here.
4615           */
4616          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4617 
4618          num_consts = MAX2(emit->num_shader_consts[constbuf],
4619                            decl->Range.Last + 1);
4620 
4621          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4622             debug_printf("Warning: constant buffer is declared to size [%u]"
4623                          " but [%u] is the limit.\n",
4624                          num_consts,
4625                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4626          }
4627          /* The linker doesn't enforce the max UBO size so we clamp here */
4628          emit->num_shader_consts[constbuf] =
4629             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4630       }
4631       return TRUE;
4632 
4633    case TGSI_FILE_IMMEDIATE:
4634       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4635       return FALSE;
4636 
4637    case TGSI_FILE_SYSTEM_VALUE:
4638       emit_system_value_declaration(emit, decl->Semantic.Name,
4639                                     decl->Range.First);
4640       return TRUE;
4641 
4642    case TGSI_FILE_SAMPLER:
4643       /* Don't declare samplers here.  Just keep track and emit later. */
4644       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4645       return TRUE;
4646 
4647 #if 0
4648    case TGSI_FILE_RESOURCE:
4649       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4650       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4651       assert(!"TGSI_FILE_RESOURCE not handled yet");
4652       return FALSE;
4653 #endif
4654 
4655    case TGSI_FILE_ADDRESS:
4656       emit->num_address_regs = MAX2(emit->num_address_regs,
4657                                     decl->Range.Last + 1);
4658       return TRUE;
4659 
4660    case TGSI_FILE_SAMPLER_VIEW:
4661       {
4662          unsigned unit = decl->Range.First;
4663          assert(decl->Range.First == decl->Range.Last);
4664          emit->sampler_target[unit] = decl->SamplerView.Resource;
4665 
4666          /* Note: we can ignore YZW return types for now */
4667          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
4668          emit->sampler_view[unit] = TRUE;
4669       }
4670       return TRUE;
4671 
4672    default:
4673       assert(!"Unexpected type of declaration");
4674       return FALSE;
4675    }
4676 }
4677 
4678 
4679 
4680 /**
4681  * Emit input declarations for fragment shader.
4682  */
4683 static void
emit_fs_input_declarations(struct svga_shader_emitter_v10 * emit)4684 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
4685 {
4686    unsigned i;
4687 
4688    for (i = 0; i < emit->linkage.num_inputs; i++) {
4689       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4690       unsigned usage_mask = emit->info.input_usage_mask[i];
4691       unsigned index = emit->linkage.input_map[i];
4692       unsigned type, interpolationMode, name;
4693       unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4694 
4695       if (usage_mask == 0)
4696          continue;  /* register is not actually used */
4697 
4698       if (semantic_name == TGSI_SEMANTIC_POSITION) {
4699          /* fragment position input */
4700          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4701          interpolationMode = VGPU10_INTERPOLATION_LINEAR;
4702          name = VGPU10_NAME_POSITION;
4703          if (usage_mask & TGSI_WRITEMASK_W) {
4704             /* we need to replace use of 'w' with '1/w' */
4705             emit->fs.fragcoord_input_index = i;
4706          }
4707       }
4708       else if (semantic_name == TGSI_SEMANTIC_FACE) {
4709          /* fragment front-facing input */
4710          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4711          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4712          name = VGPU10_NAME_IS_FRONT_FACE;
4713          emit->fs.face_input_index = i;
4714       }
4715       else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4716          /* primitive ID */
4717          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4718          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4719          name = VGPU10_NAME_PRIMITIVE_ID;
4720       }
4721       else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
4722          /* sample index / ID */
4723          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4724          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4725          name = VGPU10_NAME_SAMPLE_INDEX;
4726       }
4727       else if (semantic_name == TGSI_SEMANTIC_LAYER) {
4728          /* render target array index */
4729          if (emit->key.fs.layer_to_zero) {
4730             /**
4731              * The shader from the previous stage does not write to layer,
4732              * so reading the layer index in fragment shader should return 0.
4733              */
4734             emit->fs.layer_input_index = i;
4735             continue;
4736          } else {
4737             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4738             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4739             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
4740             mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4741          }
4742       }
4743       else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
4744          /* viewport index */
4745          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4746          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4747          name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
4748          mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4749       }
4750       else {
4751          /* general fragment input */
4752          type = VGPU10_OPCODE_DCL_INPUT_PS;
4753          interpolationMode =
4754                translate_interpolation(emit,
4755                                        emit->info.input_interpolate[i],
4756                                        emit->info.input_interpolate_loc[i]);
4757 
4758          /* keeps track if flat interpolation mode is being used */
4759          emit->uses_flat_interp = emit->uses_flat_interp ||
4760                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
4761 
4762          name = VGPU10_NAME_UNDEFINED;
4763       }
4764 
4765       emit_input_declaration(emit, type,
4766                              VGPU10_OPERAND_TYPE_INPUT,
4767                              VGPU10_OPERAND_INDEX_1D, index, 1,
4768                              name,
4769                              VGPU10_OPERAND_4_COMPONENT,
4770                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4771                              mask,
4772                              interpolationMode, TRUE,
4773                              map_tgsi_semantic_to_sgn_name(semantic_name));
4774    }
4775 }
4776 
4777 
4778 /**
4779  * Emit input declarations for vertex shader.
4780  */
4781 static void
emit_vs_input_declarations(struct svga_shader_emitter_v10 * emit)4782 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
4783 {
4784    unsigned i;
4785 
4786    for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
4787       unsigned usage_mask = emit->info.input_usage_mask[i];
4788       unsigned index = i;
4789 
4790       if (usage_mask == 0)
4791          continue;  /* register is not actually used */
4792 
4793       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4794                              VGPU10_OPERAND_TYPE_INPUT,
4795                              VGPU10_OPERAND_INDEX_1D, index, 1,
4796                              VGPU10_NAME_UNDEFINED,
4797                              VGPU10_OPERAND_4_COMPONENT,
4798                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4799                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4800                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4801                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4802    }
4803 }
4804 
4805 
4806 /**
4807  * Emit input declarations for geometry shader.
4808  */
4809 static void
emit_gs_input_declarations(struct svga_shader_emitter_v10 * emit)4810 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
4811 {
4812    unsigned i;
4813 
4814    for (i = 0; i < emit->info.num_inputs; i++) {
4815       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4816       unsigned usage_mask = emit->info.input_usage_mask[i];
4817       unsigned index = emit->linkage.input_map[i];
4818       unsigned opcodeType, operandType;
4819       unsigned numComp, selMode;
4820       unsigned name;
4821       unsigned dim;
4822 
4823       if (usage_mask == 0)
4824          continue;  /* register is not actually used */
4825 
4826       opcodeType = VGPU10_OPCODE_DCL_INPUT;
4827       operandType = VGPU10_OPERAND_TYPE_INPUT;
4828       numComp = VGPU10_OPERAND_4_COMPONENT;
4829       selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4830       name = VGPU10_NAME_UNDEFINED;
4831 
4832       /* all geometry shader inputs are two dimensional except
4833        * gl_PrimitiveID
4834        */
4835       dim = VGPU10_OPERAND_INDEX_2D;
4836 
4837       if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4838          /* Primitive ID */
4839          operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
4840          dim = VGPU10_OPERAND_INDEX_0D;
4841          numComp = VGPU10_OPERAND_0_COMPONENT;
4842          selMode = 0;
4843 
4844          /* also save the register index so we can check for
4845           * primitive id when emit src register. We need to modify the
4846           * operand type, index dimension when emit primitive id src reg.
4847           */
4848           emit->gs.prim_id_index = i;
4849       }
4850       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4851          /* vertex position input */
4852          opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
4853          name = VGPU10_NAME_POSITION;
4854       }
4855 
4856       emit_input_declaration(emit, opcodeType, operandType,
4857                              dim, index,
4858                              emit->gs.input_size,
4859                              name,
4860                              numComp, selMode,
4861                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4862                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4863                              map_tgsi_semantic_to_sgn_name(semantic_name));
4864    }
4865 }
4866 
4867 
4868 /**
4869  * Emit input declarations for tessellation control shader.
4870  */
4871 static void
emit_tcs_input_declarations(struct svga_shader_emitter_v10 * emit)4872 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
4873 {
4874    unsigned i;
4875    unsigned size = emit->key.tcs.vertices_per_patch;
4876    unsigned indicesMask = 0;
4877    boolean addSignature = TRUE;
4878 
4879    if (!emit->tcs.control_point_phase)
4880       addSignature = emit->tcs.fork_phase_add_signature;
4881 
4882    for (i = 0; i < emit->info.num_inputs; i++) {
4883       unsigned usage_mask = emit->info.input_usage_mask[i];
4884       unsigned index = emit->linkage.input_map[i];
4885       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4886       VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
4887       VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
4888       SVGA3dDXSignatureSemanticName sgn_name =
4889          map_tgsi_semantic_to_sgn_name(semantic_name);
4890 
4891       /* indices that are declared */
4892       indicesMask |= 1 << index;
4893 
4894       if (semantic_name == TGSI_SEMANTIC_POSITION ||
4895           index == emit->linkage.position_index) {
4896          /* save the input control point index for later use */
4897          emit->tcs.control_point_input_index = i;
4898       }
4899       else if (usage_mask == 0) {
4900          continue;  /* register is not actually used */
4901       }
4902       else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
4903          /* The shadow copy is being used here. So set the signature name
4904           * to UNDEFINED.
4905           */
4906          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
4907       }
4908 
4909       /* input control points in the patch constant phase are emitted in the
4910        * vicp register rather than the v register.
4911        */
4912       if (!emit->tcs.control_point_phase) {
4913          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
4914       }
4915 
4916       /* Tessellation control shader inputs are two dimensional.
4917        * The array size is determined by the patch vertex count.
4918        */
4919       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4920                              operandType,
4921                              VGPU10_OPERAND_INDEX_2D,
4922                              index, size, name,
4923                              VGPU10_OPERAND_4_COMPONENT,
4924                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4925                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4926                              VGPU10_INTERPOLATION_UNDEFINED,
4927                              addSignature, sgn_name);
4928    }
4929 
4930    if (emit->tcs.control_point_phase) {
4931       if (emit->tcs.control_point_input_index == INVALID_INDEX) {
4932 
4933          /* Add input control point declaration if it does not exist */
4934          if ((indicesMask & (1 << emit->linkage.position_index)) == 0) {
4935             emit->linkage.input_map[emit->linkage.num_inputs] =
4936                emit->linkage.position_index;
4937             emit->tcs.control_point_input_index = emit->linkage.num_inputs++;
4938 
4939             emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4940                                    VGPU10_OPERAND_TYPE_INPUT,
4941                                    VGPU10_OPERAND_INDEX_2D,
4942                                    emit->linkage.position_index,
4943                                    emit->key.tcs.vertices_per_patch,
4944                                    VGPU10_NAME_UNDEFINED,
4945                                    VGPU10_OPERAND_4_COMPONENT,
4946                                    VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4947                                    VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4948                                    VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4949                                    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4950          }
4951       }
4952 
4953       /* Also add an address register for the indirection to the
4954        * input control points
4955        */
4956       emit->tcs.control_point_addr_index = emit->num_address_regs++;
4957    }
4958 }
4959 
4960 
4961 static void
emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 * emit)4962 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
4963 {
4964 
4965    /* In tcs, tess factors are emitted as extra outputs.
4966     * The starting register index for the tess factors is captured
4967     * in the compile key.
4968     */
4969    unsigned inputIndex = emit->key.tes.tessfactor_index;
4970 
4971    if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4972       if (emit->key.tes.need_tessouter) {
4973          emit->tes.outer.in_index = inputIndex;
4974          for (int i = 0; i < 4; i++) {
4975             emit_tesslevel_declaration(emit, inputIndex++,
4976                VGPU10_OPCODE_DCL_INPUT_SIV,
4977                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4978                VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4979                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4980          }
4981       }
4982 
4983       if (emit->key.tes.need_tessinner) {
4984          emit->tes.inner.in_index = inputIndex;
4985          emit_tesslevel_declaration(emit, inputIndex++,
4986             VGPU10_OPCODE_DCL_INPUT_SIV,
4987             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4988             VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4989             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4990 
4991          emit_tesslevel_declaration(emit, inputIndex++,
4992             VGPU10_OPCODE_DCL_INPUT_SIV,
4993             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4994             VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4995             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4996       }
4997    }
4998    else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4999       if (emit->key.tes.need_tessouter) {
5000          emit->tes.outer.in_index = inputIndex;
5001          for (int i = 0; i < 3; i++) {
5002             emit_tesslevel_declaration(emit, inputIndex++,
5003                VGPU10_OPCODE_DCL_INPUT_SIV,
5004                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5005                VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5006                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5007          }
5008       }
5009 
5010       if (emit->key.tes.need_tessinner) {
5011          emit->tes.inner.in_index = inputIndex;
5012          emit_tesslevel_declaration(emit, inputIndex++,
5013             VGPU10_OPCODE_DCL_INPUT_SIV,
5014             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5015             VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5016             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5017       }
5018    }
5019    else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
5020       if (emit->key.tes.need_tessouter) {
5021          emit->tes.outer.in_index = inputIndex;
5022          emit_tesslevel_declaration(emit, inputIndex++,
5023             VGPU10_OPCODE_DCL_INPUT_SIV,
5024             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5025             VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5026             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5027 
5028          emit_tesslevel_declaration(emit, inputIndex++,
5029             VGPU10_OPCODE_DCL_INPUT_SIV,
5030             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5031             VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5032             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5033       }
5034    }
5035 }
5036 
5037 
5038 /**
5039  * Emit input declarations for tessellation evaluation shader.
5040  */
5041 static void
emit_tes_input_declarations(struct svga_shader_emitter_v10 * emit)5042 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5043 {
5044    unsigned i;
5045 
5046    for (i = 0; i < emit->info.num_inputs; i++) {
5047       unsigned usage_mask = emit->info.input_usage_mask[i];
5048       unsigned index = emit->linkage.input_map[i];
5049       unsigned size;
5050       const enum tgsi_semantic semantic_name =
5051          emit->info.input_semantic_name[i];
5052       SVGA3dDXSignatureSemanticName sgn_name;
5053       VGPU10_OPERAND_TYPE operandType;
5054       VGPU10_OPERAND_INDEX_DIMENSION dim;
5055 
5056       if (usage_mask == 0)
5057          usage_mask = 1;  /* at least set usage mask to one */
5058 
5059       if (semantic_name == TGSI_SEMANTIC_PATCH) {
5060          operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5061          dim = VGPU10_OPERAND_INDEX_1D;
5062          size = 1;
5063          sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5064       }
5065       else {
5066          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5067          dim = VGPU10_OPERAND_INDEX_2D;
5068          size = emit->key.tes.vertices_per_patch;
5069          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5070       }
5071 
5072       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5073                              dim, index, size, VGPU10_NAME_UNDEFINED,
5074                              VGPU10_OPERAND_4_COMPONENT,
5075                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5076                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5077                              VGPU10_INTERPOLATION_UNDEFINED,
5078                              TRUE, sgn_name);
5079    }
5080 
5081    emit_tessfactor_input_declarations(emit);
5082 
5083    /* DX spec requires DS input controlpoint/patch-constant signatures to match
5084     * the HS output controlpoint/patch-constant signatures exactly.
5085     * Add missing input declarations even if they are not used in the shader.
5086     */
5087    if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5088       struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5089       for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5090 
5091           /* If a tcs output does not have a corresponding input register in
5092            * tes, add one.
5093            */
5094           if (emit->linkage.prevShader.output_map[i] >
5095               emit->linkage.input_map_max) {
5096              const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5097 
5098              if (sem_name == TGSI_SEMANTIC_PATCH) {
5099                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5100                                        VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5101                                        VGPU10_OPERAND_INDEX_1D,
5102                                        i, 1, VGPU10_NAME_UNDEFINED,
5103                                        VGPU10_OPERAND_4_COMPONENT,
5104                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5105                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5106                                        VGPU10_INTERPOLATION_UNDEFINED,
5107                                        TRUE,
5108                                        map_tgsi_semantic_to_sgn_name(sem_name));
5109 
5110              } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5111                         sem_name != TGSI_SEMANTIC_TESSOUTER) {
5112                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5113                                        VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5114                                        VGPU10_OPERAND_INDEX_2D,
5115                                        i, emit->key.tes.vertices_per_patch,
5116                                        VGPU10_NAME_UNDEFINED,
5117                                        VGPU10_OPERAND_4_COMPONENT,
5118                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5119                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5120                                        VGPU10_INTERPOLATION_UNDEFINED,
5121                                        TRUE,
5122                                        map_tgsi_semantic_to_sgn_name(sem_name));
5123              }
5124              /* tessellation factors are taken care of in
5125               * emit_tessfactor_input_declarations().
5126               */
5127          }
5128       }
5129    }
5130 }
5131 
5132 
5133 /**
5134  * Emit all input declarations.
5135  */
5136 static boolean
emit_input_declarations(struct svga_shader_emitter_v10 * emit)5137 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5138 {
5139    emit->index_range.required =
5140       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
5141 
5142    switch (emit->unit) {
5143    case PIPE_SHADER_FRAGMENT:
5144       emit_fs_input_declarations(emit);
5145       break;
5146    case PIPE_SHADER_GEOMETRY:
5147       emit_gs_input_declarations(emit);
5148       break;
5149    case PIPE_SHADER_VERTEX:
5150       emit_vs_input_declarations(emit);
5151       break;
5152    case PIPE_SHADER_TESS_CTRL:
5153       emit_tcs_input_declarations(emit);
5154       break;
5155    case PIPE_SHADER_TESS_EVAL:
5156       emit_tes_input_declarations(emit);
5157       break;
5158    case PIPE_SHADER_COMPUTE:
5159       //XXX emit_cs_input_declarations(emit);
5160       break;
5161    default:
5162       assert(0);
5163    }
5164 
5165    if (emit->index_range.start_index != INVALID_INDEX) {
5166       emit_index_range_declaration(emit);
5167    }
5168    emit->index_range.required = FALSE;
5169    return TRUE;
5170 }
5171 
5172 
5173 /**
5174  * Emit all output declarations.
5175  */
5176 static boolean
emit_output_declarations(struct svga_shader_emitter_v10 * emit)5177 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5178 {
5179    emit->index_range.required =
5180       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
5181 
5182    switch (emit->unit) {
5183    case PIPE_SHADER_FRAGMENT:
5184       emit_fs_output_declarations(emit);
5185       break;
5186    case PIPE_SHADER_GEOMETRY:
5187       emit_gs_output_declarations(emit);
5188       break;
5189    case PIPE_SHADER_VERTEX:
5190       emit_vs_output_declarations(emit);
5191       break;
5192    case PIPE_SHADER_TESS_CTRL:
5193       emit_tcs_output_declarations(emit);
5194       break;
5195    case PIPE_SHADER_TESS_EVAL:
5196       emit_tes_output_declarations(emit);
5197       break;
5198    case PIPE_SHADER_COMPUTE:
5199       //XXX emit_cs_output_declarations(emit);
5200       break;
5201    default:
5202       assert(0);
5203    }
5204 
5205    if (emit->vposition.so_index != INVALID_INDEX &&
5206        emit->vposition.out_index != INVALID_INDEX) {
5207 
5208       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5209 
5210       /* Emit the declaration for the non-adjusted vertex position
5211        * for stream output purpose
5212        */
5213       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5214                               emit->vposition.so_index,
5215                               VGPU10_NAME_UNDEFINED,
5216                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5217                               TRUE,
5218                               SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5219    }
5220 
5221    if (emit->clip_dist_so_index != INVALID_INDEX &&
5222        emit->clip_dist_out_index != INVALID_INDEX) {
5223 
5224       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5225 
5226       /* Emit the declaration for the clip distance shadow copy which
5227        * will be used for stream output purpose and for clip distance
5228        * varying variable
5229        */
5230       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5231                               emit->clip_dist_so_index,
5232                               VGPU10_NAME_UNDEFINED,
5233                               emit->output_usage_mask[emit->clip_dist_out_index],
5234                               TRUE,
5235                               SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5236 
5237       if (emit->info.num_written_clipdistance > 4) {
5238          /* for the second clip distance register, each handles 4 planes */
5239          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5240                                  emit->clip_dist_so_index + 1,
5241                                  VGPU10_NAME_UNDEFINED,
5242                                  emit->output_usage_mask[emit->clip_dist_out_index+1],
5243                                  TRUE,
5244                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5245       }
5246    }
5247 
5248    if (emit->index_range.start_index != INVALID_INDEX) {
5249       emit_index_range_declaration(emit);
5250    }
5251    emit->index_range.required = FALSE;
5252    return TRUE;
5253 }
5254 
5255 
5256 /**
5257  * A helper function to create a temporary indexable array
5258  * and initialize the corresponding entries in the temp_map array.
5259  */
5260 static void
create_temp_array(struct svga_shader_emitter_v10 * emit,unsigned arrayID,unsigned first,unsigned count,unsigned startIndex)5261 create_temp_array(struct svga_shader_emitter_v10 *emit,
5262                   unsigned arrayID, unsigned first, unsigned count,
5263                   unsigned startIndex)
5264 {
5265    unsigned i, tempIndex = startIndex;
5266 
5267    emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5268    assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5269    emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5270 
5271    emit->temp_arrays[arrayID].start = first;
5272    emit->temp_arrays[arrayID].size = count;
5273 
5274    /* Fill in the temp_map entries for this temp array */
5275    for (i = 0; i < count; i++, tempIndex++) {
5276       emit->temp_map[tempIndex].arrayId = arrayID;
5277       emit->temp_map[tempIndex].index = i;
5278    }
5279 }
5280 
5281 
5282 /**
5283  * Emit the declaration for the temporary registers.
5284  */
5285 static boolean
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)5286 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5287 {
5288    unsigned total_temps, reg, i;
5289 
5290    total_temps = emit->num_shader_temps;
5291 
5292    /* If there is indirect access to non-indexable temps in the shader,
5293     * convert those temps to indexable temps. This works around a bug
5294     * in the GLSL->TGSI translator exposed in piglit test
5295     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5296     * Internal temps added by the driver remain as non-indexable temps.
5297     */
5298    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5299        emit->num_temp_arrays == 0) {
5300       create_temp_array(emit, 1, 0, total_temps, 0);
5301    }
5302 
5303    /* Allocate extra temps for specially-implemented instructions,
5304     * such as LIT.
5305     */
5306    total_temps += MAX_INTERNAL_TEMPS;
5307 
5308    /* Allocate extra temps for clip distance or clip vertex.
5309     */
5310    if (emit->clip_mode == CLIP_DISTANCE) {
5311       /* We need to write the clip distance to a temporary register
5312        * first. Then it will be copied to the shadow copy for
5313        * the clip distance varying variable and stream output purpose.
5314        * It will also be copied to the actual CLIPDIST register
5315        * according to the enabled clip planes
5316        */
5317       emit->clip_dist_tmp_index = total_temps++;
5318       if (emit->info.num_written_clipdistance > 4)
5319          total_temps++; /* second clip register */
5320    }
5321    else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5322       /* If the current shader is in the last vertex processing stage,
5323        * We need to convert the TGSI CLIPVERTEX output to one or more
5324        * clip distances.  Allocate a temp reg for the clipvertex here.
5325        */
5326       assert(emit->info.writes_clipvertex > 0);
5327       emit->clip_vertex_tmp_index = total_temps;
5328       total_temps++;
5329    }
5330 
5331    if (emit->info.uses_vertexid) {
5332       assert(emit->unit == PIPE_SHADER_VERTEX);
5333       emit->vs.vertex_id_tmp_index = total_temps++;
5334    }
5335 
5336    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5337       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5338           emit->key.clip_plane_enable ||
5339           emit->vposition.so_index != INVALID_INDEX) {
5340          emit->vposition.tmp_index = total_temps;
5341          total_temps += 1;
5342       }
5343 
5344       if (emit->vposition.need_prescale) {
5345          emit->vposition.prescale_scale_index = total_temps++;
5346          emit->vposition.prescale_trans_index = total_temps++;
5347       }
5348 
5349       if (emit->unit == PIPE_SHADER_VERTEX) {
5350          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5351                                  emit->key.vs.adjust_attrib_itof |
5352                                  emit->key.vs.adjust_attrib_utof |
5353                                  emit->key.vs.attrib_is_bgra |
5354                                  emit->key.vs.attrib_puint_to_snorm |
5355                                  emit->key.vs.attrib_puint_to_uscaled |
5356                                  emit->key.vs.attrib_puint_to_sscaled);
5357          while (attrib_mask) {
5358             unsigned index = u_bit_scan(&attrib_mask);
5359             emit->vs.adjusted_input[index] = total_temps++;
5360          }
5361       }
5362       else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5363          if (emit->key.gs.writes_viewport_index)
5364             emit->gs.viewport_index_tmp_index = total_temps++;
5365       }
5366    }
5367    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5368       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5369           emit->key.fs.write_color0_to_n_cbufs > 1) {
5370          /* Allocate a temp to hold the output color */
5371          emit->fs.color_tmp_index = total_temps;
5372          total_temps += 1;
5373       }
5374 
5375       if (emit->fs.face_input_index != INVALID_INDEX) {
5376          /* Allocate a temp for the +/-1 face register */
5377          emit->fs.face_tmp_index = total_temps;
5378          total_temps += 1;
5379       }
5380 
5381       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5382          /* Allocate a temp for modified fragment position register */
5383          emit->fs.fragcoord_tmp_index = total_temps;
5384          total_temps += 1;
5385       }
5386 
5387       if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5388          /* Allocate a temp for the sample position */
5389          emit->fs.sample_pos_tmp_index = total_temps++;
5390       }
5391    }
5392    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5393       if (emit->vposition.need_prescale) {
5394          emit->vposition.tmp_index = total_temps++;
5395          emit->vposition.prescale_scale_index = total_temps++;
5396          emit->vposition.prescale_trans_index = total_temps++;
5397       }
5398 
5399       if (emit->tes.inner.tgsi_index) {
5400          emit->tes.inner.temp_index = total_temps;
5401          total_temps += 1;
5402       }
5403 
5404       if (emit->tes.outer.tgsi_index) {
5405          emit->tes.outer.temp_index = total_temps;
5406          total_temps += 1;
5407       }
5408    }
5409    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5410       if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5411          if (!emit->tcs.control_point_phase) {
5412             emit->tcs.inner.temp_index = total_temps;
5413             total_temps += 1;
5414          }
5415       }
5416       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5417          if (!emit->tcs.control_point_phase) {
5418             emit->tcs.outer.temp_index = total_temps;
5419             total_temps += 1;
5420          }
5421       }
5422 
5423       if (emit->tcs.control_point_phase &&
5424           emit->info.reads_pervertex_outputs) {
5425          emit->tcs.control_point_tmp_index = total_temps;
5426          total_temps += emit->tcs.control_point_out_count;
5427       }
5428       else if (!emit->tcs.control_point_phase &&
5429                emit->info.reads_perpatch_outputs) {
5430 
5431          /* If there is indirect access to the patch constant outputs
5432           * in the control point phase, then an indexable temporary array
5433           * will be created for these patch constant outputs.
5434           * Note, indirect access can only be applicable to
5435           * patch constant outputs in the control point phase.
5436           */
5437          if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5438             unsigned arrayID =
5439                emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5440             create_temp_array(emit, arrayID, 0,
5441                               emit->tcs.patch_generic_out_count, total_temps);
5442          }
5443          emit->tcs.patch_generic_tmp_index = total_temps;
5444          total_temps += emit->tcs.patch_generic_out_count;
5445       }
5446 
5447       emit->tcs.invocation_id_tmp_index = total_temps++;
5448    }
5449 
5450    for (i = 0; i < emit->num_address_regs; i++) {
5451       emit->address_reg_index[i] = total_temps++;
5452    }
5453 
5454    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5455     * temp indexes.  Basically, we compact all the non-array temp register
5456     * indexes into a consecutive series.
5457     *
5458     * Before, we may have some TGSI declarations like:
5459     *   DCL TEMP[0..1], LOCAL
5460     *   DCL TEMP[2..4], ARRAY(1), LOCAL
5461     *   DCL TEMP[5..7], ARRAY(2), LOCAL
5462     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5463     *
5464     * After, we'll have a map like this:
5465     *   temp_map[0] = { array 0, index 0 }
5466     *   temp_map[1] = { array 0, index 1 }
5467     *   temp_map[2] = { array 1, index 0 }
5468     *   temp_map[3] = { array 1, index 1 }
5469     *   temp_map[4] = { array 1, index 2 }
5470     *   temp_map[5] = { array 2, index 0 }
5471     *   temp_map[6] = { array 2, index 1 }
5472     *   temp_map[7] = { array 2, index 2 }
5473     *   temp_map[8] = { array 0, index 2 }
5474     *   temp_map[9] = { array 0, index 3 }
5475     *
5476     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5477     * temps numbered 0..3
5478     *
5479     * Any time we emit a temporary register index, we'll have to use the
5480     * temp_map[] table to convert the TGSI index to the VGPU10 index.
5481     *
5482     * Finally, we recompute the total_temps value here.
5483     */
5484    reg = 0;
5485    for (i = 0; i < total_temps; i++) {
5486       if (emit->temp_map[i].arrayId == 0) {
5487          emit->temp_map[i].index = reg++;
5488       }
5489    }
5490 
5491    if (0) {
5492       debug_printf("total_temps %u\n", total_temps);
5493       for (i = 0; i < total_temps; i++) {
5494          debug_printf("temp %u ->  array %u  index %u\n",
5495                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5496       }
5497    }
5498 
5499    total_temps = reg;
5500 
5501    /* Emit declaration of ordinary temp registers */
5502    if (total_temps > 0) {
5503       VGPU10OpcodeToken0 opcode0;
5504 
5505       opcode0.value = 0;
5506       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5507 
5508       begin_emit_instruction(emit);
5509       emit_dword(emit, opcode0.value);
5510       emit_dword(emit, total_temps);
5511       end_emit_instruction(emit);
5512    }
5513 
5514    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
5515     * it's unused.
5516     */
5517    for (i = 1; i < emit->num_temp_arrays; i++) {
5518       unsigned num_temps = emit->temp_arrays[i].size;
5519 
5520       if (num_temps > 0) {
5521          VGPU10OpcodeToken0 opcode0;
5522 
5523          opcode0.value = 0;
5524          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5525 
5526          begin_emit_instruction(emit);
5527          emit_dword(emit, opcode0.value);
5528          emit_dword(emit, i); /* which array */
5529          emit_dword(emit, num_temps);
5530          emit_dword(emit, 4); /* num components */
5531          end_emit_instruction(emit);
5532 
5533          total_temps += num_temps;
5534       }
5535    }
5536 
5537    /* Check that the grand total of all regular and indexed temps is
5538     * under the limit.
5539     */
5540    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5541 
5542    return TRUE;
5543 }
5544 
5545 
5546 static boolean
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)5547 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5548 {
5549    VGPU10OpcodeToken0 opcode0;
5550    VGPU10OperandToken0 operand0;
5551    unsigned total_consts, i;
5552 
5553    opcode0.value = 0;
5554    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5555    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5556    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5557 
5558    operand0.value = 0;
5559    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5560    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5561    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5562    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5563    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5564    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5565    operand0.swizzleX = 0;
5566    operand0.swizzleY = 1;
5567    operand0.swizzleZ = 2;
5568    operand0.swizzleW = 3;
5569 
5570    /**
5571     * Emit declaration for constant buffer [0].  We also allocate
5572     * room for the extra constants here.
5573     */
5574    total_consts = emit->num_shader_consts[0];
5575 
5576    /* Now, allocate constant slots for the "extra" constants.
5577     * Note: it's critical that these extra constant locations
5578     * exactly match what's emitted by the "extra" constants code
5579     * in svga_state_constants.c
5580     */
5581 
5582    /* Vertex position scale/translation */
5583    if (emit->vposition.need_prescale) {
5584       emit->vposition.prescale_cbuf_index = total_consts;
5585       total_consts += (2 * emit->vposition.num_prescale);
5586    }
5587 
5588    if (emit->unit == PIPE_SHADER_VERTEX) {
5589       if (emit->key.vs.undo_viewport) {
5590          emit->vs.viewport_index = total_consts++;
5591       }
5592       if (emit->key.vs.need_vertex_id_bias) {
5593          emit->vs.vertex_id_bias_index = total_consts++;
5594       }
5595    }
5596 
5597    /* user-defined clip planes */
5598    if (emit->key.clip_plane_enable) {
5599       unsigned n = util_bitcount(emit->key.clip_plane_enable);
5600       assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5601              emit->unit != PIPE_SHADER_COMPUTE);
5602       for (i = 0; i < n; i++) {
5603          emit->clip_plane_const[i] = total_consts++;
5604       }
5605    }
5606 
5607    for (i = 0; i < emit->num_samplers; i++) {
5608 
5609       if (emit->key.tex[i].sampler_view) {
5610          /* Texcoord scale factors for RECT textures */
5611          if (emit->key.tex[i].unnormalized) {
5612             emit->texcoord_scale_index[i] = total_consts++;
5613          }
5614 
5615          /* Texture buffer sizes */
5616          if (emit->key.tex[i].target == PIPE_BUFFER) {
5617             emit->texture_buffer_size_index[i] = total_consts++;
5618          }
5619       }
5620    }
5621 
5622    if (total_consts > 0) {
5623       begin_emit_instruction(emit);
5624       emit_dword(emit, opcode0.value);
5625       emit_dword(emit, operand0.value);
5626       emit_dword(emit, 0);  /* which const buffer slot */
5627       emit_dword(emit, total_consts);
5628       end_emit_instruction(emit);
5629    }
5630 
5631    /* Declare remaining constant buffers (UBOs) */
5632    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
5633       if (emit->num_shader_consts[i] > 0) {
5634          begin_emit_instruction(emit);
5635          emit_dword(emit, opcode0.value);
5636          emit_dword(emit, operand0.value);
5637          emit_dword(emit, i);  /* which const buffer slot */
5638          emit_dword(emit, emit->num_shader_consts[i]);
5639          end_emit_instruction(emit);
5640       }
5641    }
5642 
5643    return TRUE;
5644 }
5645 
5646 
5647 /**
5648  * Emit declarations for samplers.
5649  */
5650 static boolean
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)5651 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
5652 {
5653    unsigned i;
5654 
5655    for (i = 0; i < emit->num_samplers; i++) {
5656       VGPU10OpcodeToken0 opcode0;
5657       VGPU10OperandToken0 operand0;
5658 
5659       opcode0.value = 0;
5660       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
5661       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
5662 
5663       operand0.value = 0;
5664       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5665       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
5666       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5667       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5668 
5669       begin_emit_instruction(emit);
5670       emit_dword(emit, opcode0.value);
5671       emit_dword(emit, operand0.value);
5672       emit_dword(emit, i);
5673       end_emit_instruction(emit);
5674    }
5675 
5676    return TRUE;
5677 }
5678 
5679 
5680 /**
5681  * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5682  */
5683 static unsigned
pipe_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,boolean is_array)5684 pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
5685                                    unsigned num_samples,
5686                                    boolean is_array)
5687 {
5688    switch (target) {
5689    case PIPE_BUFFER:
5690       return VGPU10_RESOURCE_DIMENSION_BUFFER;
5691    case PIPE_TEXTURE_1D:
5692       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5693    case PIPE_TEXTURE_2D:
5694       return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
5695          VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5696    case PIPE_TEXTURE_RECT:
5697       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5698    case PIPE_TEXTURE_3D:
5699       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5700    case PIPE_TEXTURE_CUBE:
5701       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5702    case PIPE_TEXTURE_1D_ARRAY:
5703       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5704          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5705    case PIPE_TEXTURE_2D_ARRAY:
5706       if (num_samples > 2 && is_array)
5707          return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
5708       else if (is_array)
5709          return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
5710       else
5711          return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5712    case PIPE_TEXTURE_CUBE_ARRAY:
5713          return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
5714                          VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5715    default:
5716       assert(!"Unexpected resource type");
5717       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5718    }
5719 }
5720 
5721 
5722 /**
5723  * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5724  */
5725 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,boolean is_array)5726 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
5727                                    unsigned num_samples,
5728                                    boolean is_array)
5729 {
5730    if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
5731       target = TGSI_TEXTURE_2D;
5732    }
5733    else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
5734       target = TGSI_TEXTURE_2D_ARRAY;
5735    }
5736 
5737    switch (target) {
5738    case TGSI_TEXTURE_BUFFER:
5739       return VGPU10_RESOURCE_DIMENSION_BUFFER;
5740    case TGSI_TEXTURE_1D:
5741       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5742    case TGSI_TEXTURE_2D:
5743    case TGSI_TEXTURE_RECT:
5744       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5745    case TGSI_TEXTURE_3D:
5746       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5747    case TGSI_TEXTURE_CUBE:
5748    case TGSI_TEXTURE_SHADOWCUBE:
5749       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5750    case TGSI_TEXTURE_SHADOW1D:
5751       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5752    case TGSI_TEXTURE_SHADOW2D:
5753    case TGSI_TEXTURE_SHADOWRECT:
5754       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5755    case TGSI_TEXTURE_1D_ARRAY:
5756    case TGSI_TEXTURE_SHADOW1D_ARRAY:
5757       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5758          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5759    case TGSI_TEXTURE_2D_ARRAY:
5760    case TGSI_TEXTURE_SHADOW2D_ARRAY:
5761       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
5762          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5763    case TGSI_TEXTURE_2D_MSAA:
5764       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5765    case TGSI_TEXTURE_2D_ARRAY_MSAA:
5766       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
5767          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5768    case TGSI_TEXTURE_CUBE_ARRAY:
5769    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
5770       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
5771          : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5772    default:
5773       assert(!"Unexpected resource type");
5774       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5775    }
5776 }
5777 
5778 
5779 /**
5780  * Given a tgsi_return_type, return true iff it is an integer type.
5781  */
5782 static boolean
is_integer_type(enum tgsi_return_type type)5783 is_integer_type(enum tgsi_return_type type)
5784 {
5785    switch (type) {
5786       case TGSI_RETURN_TYPE_SINT:
5787       case TGSI_RETURN_TYPE_UINT:
5788          return TRUE;
5789       case TGSI_RETURN_TYPE_FLOAT:
5790       case TGSI_RETURN_TYPE_UNORM:
5791       case TGSI_RETURN_TYPE_SNORM:
5792          return FALSE;
5793       case TGSI_RETURN_TYPE_COUNT:
5794       default:
5795          assert(!"is_integer_type: Unknown tgsi_return_type");
5796          return FALSE;
5797    }
5798 }
5799 
5800 
5801 /**
5802  * Emit declarations for resources.
5803  * XXX When we're sure that all TGSI shaders will be generated with
5804  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
5805  * rework this code.
5806  */
5807 static boolean
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)5808 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
5809 {
5810    unsigned i;
5811 
5812    /* Emit resource decl for each sampler */
5813    for (i = 0; i < emit->num_samplers; i++) {
5814       VGPU10OpcodeToken0 opcode0;
5815       VGPU10OperandToken0 operand0;
5816       VGPU10ResourceReturnTypeToken return_type;
5817       VGPU10_RESOURCE_RETURN_TYPE rt;
5818 
5819       opcode0.value = 0;
5820       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
5821       if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
5822          opcode0.resourceDimension =
5823             tgsi_texture_to_resource_dimension(emit->sampler_target[i],
5824                                                emit->key.tex[i].num_samples,
5825                                                emit->key.tex[i].is_array);
5826       }
5827       else {
5828          opcode0.resourceDimension =
5829             pipe_texture_to_resource_dimension(emit->key.tex[i].target,
5830                                                emit->key.tex[i].num_samples,
5831                                                emit->key.tex[i].is_array);
5832       }
5833       opcode0.sampleCount = emit->key.tex[i].num_samples;
5834       operand0.value = 0;
5835       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5836       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5837       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5838       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5839 
5840 #if 1
5841       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
5842       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
5843       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
5844       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
5845       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
5846       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
5847       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
5848       if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
5849          rt = emit->sampler_return_type[i] + 1;
5850       }
5851       else {
5852          rt = emit->key.tex[i].sampler_return_type;
5853       }
5854 #else
5855       switch (emit->sampler_return_type[i]) {
5856          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
5857          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
5858          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
5859          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
5860          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
5861          case TGSI_RETURN_TYPE_COUNT:
5862          default:
5863             rt = VGPU10_RETURN_TYPE_FLOAT;
5864             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
5865       }
5866 #endif
5867 
5868       return_type.value = 0;
5869       return_type.component0 = rt;
5870       return_type.component1 = rt;
5871       return_type.component2 = rt;
5872       return_type.component3 = rt;
5873 
5874       begin_emit_instruction(emit);
5875       emit_dword(emit, opcode0.value);
5876       emit_dword(emit, operand0.value);
5877       emit_dword(emit, i);
5878       emit_dword(emit, return_type.value);
5879       end_emit_instruction(emit);
5880    }
5881 
5882    return TRUE;
5883 }
5884 
5885 /**
5886  * Emit instruction with n=1, 2 or 3 source registers.
5887  */
5888 static void
emit_instruction_opn(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,boolean saturate,bool precise)5889 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
5890                      unsigned opcode,
5891                      const struct tgsi_full_dst_register *dst,
5892                      const struct tgsi_full_src_register *src1,
5893                      const struct tgsi_full_src_register *src2,
5894                      const struct tgsi_full_src_register *src3,
5895                      boolean saturate, bool precise)
5896 {
5897    begin_emit_instruction(emit);
5898    emit_opcode_precise(emit, opcode, saturate, precise);
5899    emit_dst_register(emit, dst);
5900    emit_src_register(emit, src1);
5901    if (src2) {
5902       emit_src_register(emit, src2);
5903    }
5904    if (src3) {
5905       emit_src_register(emit, src3);
5906    }
5907    end_emit_instruction(emit);
5908 }
5909 
5910 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)5911 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
5912                      unsigned opcode,
5913                      const struct tgsi_full_dst_register *dst,
5914                      const struct tgsi_full_src_register *src)
5915 {
5916    emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
5917 }
5918 
5919 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2)5920 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
5921                      VGPU10_OPCODE_TYPE opcode,
5922                      const struct tgsi_full_dst_register *dst,
5923                      const struct tgsi_full_src_register *src1,
5924                      const struct tgsi_full_src_register *src2)
5925 {
5926    emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
5927 }
5928 
5929 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3)5930 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
5931                      VGPU10_OPCODE_TYPE opcode,
5932                      const struct tgsi_full_dst_register *dst,
5933                      const struct tgsi_full_src_register *src1,
5934                      const struct tgsi_full_src_register *src2,
5935                      const struct tgsi_full_src_register *src3)
5936 {
5937    emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
5938 }
5939 
5940 static void
emit_instruction_op0(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode)5941 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
5942                      VGPU10_OPCODE_TYPE opcode)
5943 {
5944    begin_emit_instruction(emit);
5945    emit_opcode(emit, opcode, FALSE);
5946    end_emit_instruction(emit);
5947 }
5948 
5949 /**
5950  * Tessellation inner/outer levels needs to be store into its
5951  * appropriate registers depending on prim_mode.
5952  */
5953 static void
store_tesslevels(struct svga_shader_emitter_v10 * emit)5954 store_tesslevels(struct svga_shader_emitter_v10 *emit)
5955 {
5956    int i;
5957 
5958    /* tessellation levels are required input/out in hull shader.
5959     * emitting the inner/outer tessellation levels, either from
5960     * values provided in tcs or fallback default values which is 1.0
5961     */
5962    if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
5963       struct tgsi_full_src_register temp_src;
5964 
5965       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5966          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5967       else
5968          temp_src = make_immediate_reg_float(emit, 1.0f);
5969 
5970       for (i = 0; i < 2; i++) {
5971          struct tgsi_full_src_register src =
5972             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5973          struct tgsi_full_dst_register dst =
5974             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
5975          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5976          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5977       }
5978 
5979       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5980          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5981       else
5982          temp_src = make_immediate_reg_float(emit, 1.0f);
5983 
5984       for (i = 0; i < 4; i++) {
5985          struct tgsi_full_src_register src =
5986             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5987          struct tgsi_full_dst_register dst =
5988             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5989          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5990          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5991       }
5992    }
5993    else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
5994       struct tgsi_full_src_register temp_src;
5995 
5996       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5997          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5998       else
5999          temp_src = make_immediate_reg_float(emit, 1.0f);
6000 
6001       struct tgsi_full_src_register src =
6002          scalar_src(&temp_src, TGSI_SWIZZLE_X);
6003       struct tgsi_full_dst_register dst =
6004          make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6005       dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6006       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6007 
6008       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6009          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6010       else
6011          temp_src = make_immediate_reg_float(emit, 1.0f);
6012 
6013       for (i = 0; i < 3; i++) {
6014          struct tgsi_full_src_register src =
6015             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6016          struct tgsi_full_dst_register dst =
6017             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6018          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6019          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6020       }
6021    }
6022    else if (emit->key.tcs.prim_mode ==  PIPE_PRIM_LINES) {
6023       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6024          struct tgsi_full_src_register temp_src =
6025             make_src_temp_reg(emit->tcs.outer.temp_index);
6026          for (i = 0; i < 2; i++) {
6027             struct tgsi_full_src_register src =
6028                scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6029             struct tgsi_full_dst_register dst =
6030                make_dst_reg(TGSI_FILE_OUTPUT,
6031                             emit->tcs.outer.out_index + i);
6032             dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6033             emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6034          }
6035       }
6036    }
6037    else {
6038       debug_printf("Unsupported primitive type");
6039    }
6040 }
6041 
6042 
6043 /**
6044  * Emit the actual clip distance instructions to be used for clipping
6045  * by copying the clip distance from the temporary registers to the
6046  * CLIPDIST registers written with the enabled planes mask.
6047  * Also copy the clip distance from the temporary to the clip distance
6048  * shadow copy register which will be referenced by the input shader
6049  */
6050 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)6051 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6052 {
6053    struct tgsi_full_src_register tmp_clip_dist_src;
6054    struct tgsi_full_dst_register clip_dist_dst;
6055 
6056    unsigned i;
6057    unsigned clip_plane_enable = emit->key.clip_plane_enable;
6058    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6059    int num_written_clipdist = emit->info.num_written_clipdistance;
6060 
6061    assert(emit->clip_dist_out_index != INVALID_INDEX);
6062    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6063 
6064    /**
6065     * Temporary reset the temporary clip dist register index so
6066     * that the copy to the real clip dist register will not
6067     * attempt to copy to the temporary register again
6068     */
6069    emit->clip_dist_tmp_index = INVALID_INDEX;
6070 
6071    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6072 
6073       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6074 
6075       /**
6076        * copy to the shadow copy for use by varying variable and
6077        * stream output. All clip distances
6078        * will be written regardless of the enabled clipping planes.
6079        */
6080       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6081                                    emit->clip_dist_so_index + i);
6082 
6083       /* MOV clip_dist_so, tmp_clip_dist */
6084       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6085                            &tmp_clip_dist_src);
6086 
6087       /**
6088        * copy those clip distances to enabled clipping planes
6089        * to CLIPDIST registers for clipping
6090        */
6091       if (clip_plane_enable & 0xf) {
6092          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6093                                       emit->clip_dist_out_index + i);
6094          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6095 
6096          /* MOV CLIPDIST, tmp_clip_dist */
6097          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6098                               &tmp_clip_dist_src);
6099       }
6100       /* four clip planes per clip register */
6101       clip_plane_enable >>= 4;
6102    }
6103    /**
6104     * set the temporary clip dist register index back to the
6105     * temporary index for the next vertex
6106     */
6107    emit->clip_dist_tmp_index = clip_dist_tmp_index;
6108 }
6109 
6110 /* Declare clip distance output registers for user-defined clip planes
6111  * or the TGSI_CLIPVERTEX output.
6112  */
6113 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)6114 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6115 {
6116    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6117    unsigned index = emit->num_outputs;
6118    unsigned plane_mask;
6119 
6120    assert(emit->unit != PIPE_SHADER_FRAGMENT);
6121    assert(num_clip_planes <= 8);
6122 
6123    if (emit->clip_mode != CLIP_LEGACY &&
6124        emit->clip_mode != CLIP_VERTEX) {
6125       return;
6126    }
6127 
6128    if (num_clip_planes == 0)
6129       return;
6130 
6131    /* Convert clip vertex to clip distances only in the last vertex stage */
6132    if (!emit->key.last_vertex_stage)
6133       return;
6134 
6135    /* Declare one or two clip output registers.  The number of components
6136     * in the mask reflects the number of clip planes.  For example, if 5
6137     * clip planes are needed, we'll declare outputs similar to:
6138     * dcl_output_siv o2.xyzw, clip_distance
6139     * dcl_output_siv o3.x, clip_distance
6140     */
6141    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6142 
6143    plane_mask = (1 << num_clip_planes) - 1;
6144    if (plane_mask & 0xf) {
6145       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6146       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6147                               VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6148                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6149       emit->num_outputs++;
6150    }
6151    if (plane_mask & 0xf0) {
6152       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6153       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6154                               VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6155                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6156       emit->num_outputs++;
6157    }
6158 }
6159 
6160 
6161 /**
6162  * Emit the instructions for writing to the clip distance registers
6163  * to handle legacy/automatic clip planes.
6164  * For each clip plane, the distance is the dot product of the vertex
6165  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6166  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6167  * output registers already declared.
6168  */
6169 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)6170 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6171                              unsigned vpos_tmp_index)
6172 {
6173    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6174 
6175    assert(emit->clip_mode == CLIP_LEGACY);
6176    assert(num_clip_planes <= 8);
6177 
6178    assert(emit->unit == PIPE_SHADER_VERTEX ||
6179           emit->unit == PIPE_SHADER_GEOMETRY ||
6180           emit->unit == PIPE_SHADER_TESS_EVAL);
6181 
6182    for (i = 0; i < num_clip_planes; i++) {
6183       struct tgsi_full_dst_register dst;
6184       struct tgsi_full_src_register plane_src, vpos_src;
6185       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6186       unsigned comp = i % 4;
6187       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6188 
6189       /* create dst, src regs */
6190       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6191       dst = writemask_dst(&dst, writemask);
6192 
6193       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6194       vpos_src = make_src_temp_reg(vpos_tmp_index);
6195 
6196       /* DP4 clip_dist, plane, vpos */
6197       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6198                            &plane_src, &vpos_src);
6199    }
6200 }
6201 
6202 
6203 /**
6204  * Emit the instructions for computing the clip distance results from
6205  * the clip vertex temporary.
6206  * For each clip plane, the distance is the dot product of the clip vertex
6207  * position (found in a temp reg) and the clip plane coefficients.
6208  */
6209 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)6210 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6211 {
6212    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6213    unsigned i;
6214    struct tgsi_full_dst_register dst;
6215    struct tgsi_full_src_register clipvert_src;
6216    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6217 
6218    assert(emit->unit == PIPE_SHADER_VERTEX ||
6219           emit->unit == PIPE_SHADER_GEOMETRY ||
6220           emit->unit == PIPE_SHADER_TESS_EVAL);
6221 
6222    assert(emit->clip_mode == CLIP_VERTEX);
6223 
6224    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6225 
6226    for (i = 0; i < num_clip; i++) {
6227       struct tgsi_full_src_register plane_src;
6228       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6229       unsigned comp = i % 4;
6230       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6231 
6232       /* create dst, src regs */
6233       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6234       dst = writemask_dst(&dst, writemask);
6235 
6236       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6237 
6238       /* DP4 clip_dist, plane, vpos */
6239       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6240                            &plane_src, &clipvert_src);
6241    }
6242 
6243    /* copy temporary clip vertex register to the clip vertex register */
6244 
6245    assert(emit->clip_vertex_out_index != INVALID_INDEX);
6246 
6247    /**
6248     * temporary reset the temporary clip vertex register index so
6249     * that copy to the clip vertex register will not attempt
6250     * to copy to the temporary register again
6251     */
6252    emit->clip_vertex_tmp_index = INVALID_INDEX;
6253 
6254    /* MOV clip_vertex, clip_vertex_tmp */
6255    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6256    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6257                         &dst, &clipvert_src);
6258 
6259    /**
6260     * set the temporary clip vertex register index back to the
6261     * temporary index for the next vertex
6262     */
6263    emit->clip_vertex_tmp_index = clip_vertex_tmp;
6264 }
6265 
6266 /**
6267  * Emit code to convert RGBA to BGRA
6268  */
6269 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6270 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6271                      const struct tgsi_full_dst_register *dst,
6272                      const struct tgsi_full_src_register *src)
6273 {
6274    struct tgsi_full_src_register bgra_src =
6275       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6276 
6277    begin_emit_instruction(emit);
6278    emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6279    emit_dst_register(emit, dst);
6280    emit_src_register(emit, &bgra_src);
6281    end_emit_instruction(emit);
6282 }
6283 
6284 
6285 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6286 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6287 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6288                     const struct tgsi_full_dst_register *dst,
6289                     const struct tgsi_full_src_register *src)
6290 {
6291    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6292    struct tgsi_full_src_register two =
6293       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6294    struct tgsi_full_src_register neg_two =
6295       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6296 
6297    unsigned val_tmp = get_temp_index(emit);
6298    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6299    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6300 
6301    unsigned bias_tmp = get_temp_index(emit);
6302    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6303    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6304 
6305    /* val = src * 2.0 */
6306    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6307 
6308    /* bias = src > 0.5 */
6309    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6310 
6311    /* bias = bias & -2.0 */
6312    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6313                         &bias_src, &neg_two);
6314 
6315    /* dst = val + bias */
6316    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6317                         &val_src, &bias_src);
6318 
6319    free_temp_indexes(emit);
6320 }
6321 
6322 
6323 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6324 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6325 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6326                       const struct tgsi_full_dst_register *dst,
6327                       const struct tgsi_full_src_register *src)
6328 {
6329    struct tgsi_full_src_register scale =
6330       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6331 
6332    /* dst = src * scale */
6333    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
6334 }
6335 
6336 
6337 /** Convert from R32_UINT to 10_10_10_2_sscaled */
6338 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6339 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
6340                       const struct tgsi_full_dst_register *dst,
6341                       const struct tgsi_full_src_register *src)
6342 {
6343    struct tgsi_full_src_register lshift =
6344       make_immediate_reg_int4(emit, 22, 12, 2, 0);
6345    struct tgsi_full_src_register rshift =
6346       make_immediate_reg_int4(emit, 22, 22, 22, 30);
6347 
6348    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
6349 
6350    unsigned tmp = get_temp_index(emit);
6351    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6352    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6353 
6354    /*
6355     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
6356     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
6357     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
6358     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
6359     * dst = i_to_f(r,g,b,a);     # convert to float
6360     */
6361    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
6362                         &src_xxxx, &lshift);
6363    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
6364                         &tmp_src, &rshift);
6365    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
6366 
6367    free_temp_indexes(emit);
6368 }
6369 
6370 
6371 /**
6372  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6373  */
6374 static boolean
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6375 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
6376               const struct tgsi_full_instruction *inst)
6377 {
6378    unsigned index = inst->Dst[0].Register.Index;
6379    struct tgsi_full_dst_register dst;
6380    VGPU10_OPCODE_TYPE opcode;
6381 
6382    assert(index < MAX_VGPU10_ADDR_REGS);
6383    dst = make_dst_temp_reg(emit->address_reg_index[index]);
6384    dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
6385 
6386    /* ARL dst, s0
6387     * Translates into:
6388     * FTOI address_tmp, s0
6389     *
6390     * UARL dst, s0
6391     * Translates into:
6392     * MOV address_tmp, s0
6393     */
6394    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
6395       opcode = VGPU10_OPCODE_FTOI;
6396    else
6397       opcode = VGPU10_OPCODE_MOV;
6398 
6399    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
6400 
6401    return TRUE;
6402 }
6403 
6404 
6405 /**
6406  * Emit code for TGSI_OPCODE_CAL instruction.
6407  */
6408 static boolean
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6409 emit_cal(struct svga_shader_emitter_v10 *emit,
6410          const struct tgsi_full_instruction *inst)
6411 {
6412    unsigned label = inst->Label.Label;
6413    VGPU10OperandToken0 operand;
6414    operand.value = 0;
6415    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
6416 
6417    begin_emit_instruction(emit);
6418    emit_dword(emit, operand.value);
6419    emit_dword(emit, label);
6420    end_emit_instruction(emit);
6421 
6422    return TRUE;
6423 }
6424 
6425 
6426 /**
6427  * Emit code for TGSI_OPCODE_IABS instruction.
6428  */
6429 static boolean
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6430 emit_iabs(struct svga_shader_emitter_v10 *emit,
6431           const struct tgsi_full_instruction *inst)
6432 {
6433    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
6434     * dst.y = (src0.y < 0) ? -src0.y : src0.y
6435     * dst.z = (src0.z < 0) ? -src0.z : src0.z
6436     * dst.w = (src0.w < 0) ? -src0.w : src0.w
6437     *
6438     * Translates into
6439     *   IMAX dst, src, neg(src)
6440     */
6441    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
6442    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
6443                         &inst->Src[0], &neg_src);
6444 
6445    return TRUE;
6446 }
6447 
6448 
6449 /**
6450  * Emit code for TGSI_OPCODE_CMP instruction.
6451  */
6452 static boolean
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6453 emit_cmp(struct svga_shader_emitter_v10 *emit,
6454          const struct tgsi_full_instruction *inst)
6455 {
6456    /* dst.x = (src0.x < 0) ? src1.x : src2.x
6457     * dst.y = (src0.y < 0) ? src1.y : src2.y
6458     * dst.z = (src0.z < 0) ? src1.z : src2.z
6459     * dst.w = (src0.w < 0) ? src1.w : src2.w
6460     *
6461     * Translates into
6462     *   LT tmp, src0, 0.0
6463     *   MOVC dst, tmp, src1, src2
6464     */
6465    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6466    unsigned tmp = get_temp_index(emit);
6467    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6468    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6469 
6470    emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
6471                         &inst->Src[0], &zero, NULL, FALSE,
6472                         inst->Instruction.Precise);
6473    emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
6474                         &tmp_src, &inst->Src[1], &inst->Src[2],
6475                         inst->Instruction.Saturate, FALSE);
6476 
6477    free_temp_indexes(emit);
6478 
6479    return TRUE;
6480 }
6481 
6482 
6483 /**
6484  * Emit code for TGSI_OPCODE_DST instruction.
6485  */
6486 static boolean
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6487 emit_dst(struct svga_shader_emitter_v10 *emit,
6488          const struct tgsi_full_instruction *inst)
6489 {
6490    /*
6491     * dst.x = 1
6492     * dst.y = src0.y * src1.y
6493     * dst.z = src0.z
6494     * dst.w = src1.w
6495     */
6496 
6497    struct tgsi_full_src_register s0_yyyy =
6498       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6499    struct tgsi_full_src_register s0_zzzz =
6500       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
6501    struct tgsi_full_src_register s1_yyyy =
6502       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
6503    struct tgsi_full_src_register s1_wwww =
6504       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
6505 
6506    /*
6507     * If dst and either src0 and src1 are the same we need
6508     * to create a temporary for it and insert a extra move.
6509     */
6510    unsigned tmp_move = get_temp_index(emit);
6511    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6512    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6513 
6514    /* MOV dst.x, 1.0 */
6515    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6516       struct tgsi_full_dst_register dst_x =
6517          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6518       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6519 
6520       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6521    }
6522 
6523    /* MUL dst.y, s0.y, s1.y */
6524    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6525       struct tgsi_full_dst_register dst_y =
6526          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6527 
6528       emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
6529                            &s1_yyyy, NULL, inst->Instruction.Saturate,
6530                            inst->Instruction.Precise);
6531    }
6532 
6533    /* MOV dst.z, s0.z */
6534    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6535       struct tgsi_full_dst_register dst_z =
6536          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6537 
6538       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6539                            &dst_z, &s0_zzzz, NULL, NULL,
6540                            inst->Instruction.Saturate,
6541                            inst->Instruction.Precise);
6542   }
6543 
6544    /* MOV dst.w, s1.w */
6545    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6546       struct tgsi_full_dst_register dst_w =
6547          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6548 
6549       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6550                            &dst_w, &s1_wwww, NULL, NULL,
6551                            inst->Instruction.Saturate,
6552                            inst->Instruction.Precise);
6553    }
6554 
6555    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6556    free_temp_indexes(emit);
6557 
6558    return TRUE;
6559 }
6560 
6561 
6562 /**
6563  * A helper function to return the stream index as specified in
6564  * the immediate register
6565  */
6566 static inline unsigned
find_stream_index(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)6567 find_stream_index(struct svga_shader_emitter_v10 *emit,
6568                   const struct tgsi_full_src_register *src)
6569 {
6570    return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
6571 }
6572 
6573 
6574 /**
6575  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
6576  */
6577 static boolean
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6578 emit_endprim(struct svga_shader_emitter_v10 *emit,
6579              const struct tgsi_full_instruction *inst)
6580 {
6581    assert(emit->unit == PIPE_SHADER_GEOMETRY);
6582 
6583    begin_emit_instruction(emit);
6584    if (emit->version >= 50) {
6585       unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
6586 
6587       if (emit->info.num_stream_output_components[streamIndex] == 0) {
6588          /**
6589           * If there is no output for this stream, discard this instruction.
6590           */
6591          emit->discard_instruction = TRUE;
6592       }
6593       else {
6594          emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
6595          assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
6596          emit_stream_register(emit, streamIndex);
6597       }
6598    }
6599    else {
6600       emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
6601    }
6602    end_emit_instruction(emit);
6603    return TRUE;
6604 }
6605 
6606 
6607 /**
6608  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
6609  */
6610 static boolean
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6611 emit_ex2(struct svga_shader_emitter_v10 *emit,
6612          const struct tgsi_full_instruction *inst)
6613 {
6614    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
6615     * while VGPU10 computes four values.
6616     *
6617     * dst = EX2(src):
6618     *   dst.xyzw = 2.0 ^ src.x
6619     */
6620 
6621    struct tgsi_full_src_register src_xxxx =
6622       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6623                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6624 
6625    /* EXP tmp, s0.xxxx */
6626    emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
6627                         NULL, NULL,
6628                         inst->Instruction.Saturate,
6629                         inst->Instruction.Precise);
6630 
6631    return TRUE;
6632 }
6633 
6634 
6635 /**
6636  * Emit code for TGSI_OPCODE_EXP instruction.
6637  */
6638 static boolean
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6639 emit_exp(struct svga_shader_emitter_v10 *emit,
6640          const struct tgsi_full_instruction *inst)
6641 {
6642    /*
6643     * dst.x = 2 ^ floor(s0.x)
6644     * dst.y = s0.x - floor(s0.x)
6645     * dst.z = 2 ^ s0.x
6646     * dst.w = 1.0
6647     */
6648 
6649    struct tgsi_full_src_register src_xxxx =
6650       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6651    unsigned tmp = get_temp_index(emit);
6652    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6653    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6654 
6655    /*
6656     * If dst and src are the same we need to create
6657     * a temporary for it and insert a extra move.
6658     */
6659    unsigned tmp_move = get_temp_index(emit);
6660    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6661    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6662 
6663    /* only use X component of temp reg */
6664    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6665    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6666 
6667    /* ROUND_NI tmp.x, s0.x */
6668    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
6669                         &src_xxxx); /* round to -infinity */
6670 
6671    /* EXP dst.x, tmp.x */
6672    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6673       struct tgsi_full_dst_register dst_x =
6674          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6675 
6676       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
6677                            NULL, NULL,
6678                            inst->Instruction.Saturate,
6679                            inst->Instruction.Precise);
6680    }
6681 
6682    /* ADD dst.y, s0.x, -tmp */
6683    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6684       struct tgsi_full_dst_register dst_y =
6685          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6686       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
6687 
6688       emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
6689                            &neg_tmp_src, NULL,
6690                            inst->Instruction.Saturate,
6691                            inst->Instruction.Precise);
6692    }
6693 
6694    /* EXP dst.z, s0.x */
6695    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6696       struct tgsi_full_dst_register dst_z =
6697          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6698 
6699       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
6700                            NULL, NULL,
6701                            inst->Instruction.Saturate,
6702                            inst->Instruction.Precise);
6703    }
6704 
6705    /* MOV dst.w, 1.0 */
6706    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6707       struct tgsi_full_dst_register dst_w =
6708          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6709       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6710 
6711       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6712    }
6713 
6714    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6715 
6716    free_temp_indexes(emit);
6717 
6718    return TRUE;
6719 }
6720 
6721 
6722 /**
6723  * Emit code for TGSI_OPCODE_IF instruction.
6724  */
6725 static boolean
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)6726 emit_if(struct svga_shader_emitter_v10 *emit,
6727         const struct tgsi_full_src_register *src)
6728 {
6729    VGPU10OpcodeToken0 opcode0;
6730 
6731    /* The src register should be a scalar */
6732    assert(src->Register.SwizzleX == src->Register.SwizzleY &&
6733           src->Register.SwizzleX == src->Register.SwizzleZ &&
6734           src->Register.SwizzleX == src->Register.SwizzleW);
6735 
6736    /* The only special thing here is that we need to set the
6737     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
6738     * src.x is non-zero.
6739     */
6740    opcode0.value = 0;
6741    opcode0.opcodeType = VGPU10_OPCODE_IF;
6742    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
6743 
6744    begin_emit_instruction(emit);
6745    emit_dword(emit, opcode0.value);
6746    emit_src_register(emit, src);
6747    end_emit_instruction(emit);
6748 
6749    return TRUE;
6750 }
6751 
6752 
6753 /**
6754  * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
6755  * the register components are negative).
6756  */
6757 static boolean
emit_kill_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6758 emit_kill_if(struct svga_shader_emitter_v10 *emit,
6759              const struct tgsi_full_instruction *inst)
6760 {
6761    unsigned tmp = get_temp_index(emit);
6762    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6763    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6764 
6765    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6766 
6767    struct tgsi_full_dst_register tmp_dst_x =
6768       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6769    struct tgsi_full_src_register tmp_src_xxxx =
6770       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6771 
6772    /* tmp = src[0] < 0.0 */
6773    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
6774 
6775    if (!same_swizzle_terms(&inst->Src[0])) {
6776       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
6777        * logically OR the swizzle terms.  Most uses of KILL_IF only
6778        * test one channel so it's good to avoid these extra steps.
6779        */
6780       struct tgsi_full_src_register tmp_src_yyyy =
6781          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
6782       struct tgsi_full_src_register tmp_src_zzzz =
6783          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
6784       struct tgsi_full_src_register tmp_src_wwww =
6785          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
6786 
6787       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6788                            &tmp_src_yyyy);
6789       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6790                            &tmp_src_zzzz);
6791       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6792                            &tmp_src_wwww);
6793    }
6794 
6795    begin_emit_instruction(emit);
6796    emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
6797    emit_src_register(emit, &tmp_src_xxxx);
6798    end_emit_instruction(emit);
6799 
6800    free_temp_indexes(emit);
6801 
6802    return TRUE;
6803 }
6804 
6805 
6806 /**
6807  * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
6808  */
6809 static boolean
emit_kill(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6810 emit_kill(struct svga_shader_emitter_v10 *emit,
6811           const struct tgsi_full_instruction *inst)
6812 {
6813    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6814 
6815    /* DISCARD if 0.0 is zero */
6816    begin_emit_instruction(emit);
6817    emit_discard_opcode(emit, FALSE);
6818    emit_src_register(emit, &zero);
6819    end_emit_instruction(emit);
6820 
6821    return TRUE;
6822 }
6823 
6824 
6825 /**
6826  * Emit code for TGSI_OPCODE_LG2 instruction.
6827  */
6828 static boolean
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6829 emit_lg2(struct svga_shader_emitter_v10 *emit,
6830          const struct tgsi_full_instruction *inst)
6831 {
6832    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
6833     * while VGPU10 computes four values.
6834     *
6835     * dst = LG2(src):
6836     *   dst.xyzw = log2(src.x)
6837     */
6838 
6839    struct tgsi_full_src_register src_xxxx =
6840       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6841                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6842 
6843    /* LOG tmp, s0.xxxx */
6844    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
6845                         &inst->Dst[0], &src_xxxx, NULL, NULL,
6846                         inst->Instruction.Saturate,
6847                         inst->Instruction.Precise);
6848 
6849    return TRUE;
6850 }
6851 
6852 
6853 /**
6854  * Emit code for TGSI_OPCODE_LIT instruction.
6855  */
6856 static boolean
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6857 emit_lit(struct svga_shader_emitter_v10 *emit,
6858          const struct tgsi_full_instruction *inst)
6859 {
6860    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6861 
6862    /*
6863     * If dst and src are the same we need to create
6864     * a temporary for it and insert a extra move.
6865     */
6866    unsigned tmp_move = get_temp_index(emit);
6867    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6868    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6869 
6870    /*
6871     * dst.x = 1
6872     * dst.y = max(src.x, 0)
6873     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
6874     * dst.w = 1
6875     */
6876 
6877    /* MOV dst.x, 1.0 */
6878    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6879       struct tgsi_full_dst_register dst_x =
6880          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6881       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6882    }
6883 
6884    /* MOV dst.w, 1.0 */
6885    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6886       struct tgsi_full_dst_register dst_w =
6887          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6888       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6889    }
6890 
6891    /* MAX dst.y, src.x, 0.0 */
6892    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6893       struct tgsi_full_dst_register dst_y =
6894          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6895       struct tgsi_full_src_register zero =
6896          make_immediate_reg_float(emit, 0.0f);
6897       struct tgsi_full_src_register src_xxxx =
6898          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6899                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6900 
6901       emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
6902                            &zero, NULL, inst->Instruction.Saturate, FALSE);
6903    }
6904 
6905    /*
6906     * tmp1 = clamp(src.w, -128, 128);
6907     *   MAX tmp1, src.w, -128
6908     *   MIN tmp1, tmp1, 128
6909     *
6910     * tmp2 = max(tmp2, 0);
6911     *   MAX tmp2, src.y, 0
6912     *
6913     * tmp1 = pow(tmp2, tmp1);
6914     *   LOG tmp2, tmp2
6915     *   MUL tmp1, tmp2, tmp1
6916     *   EXP tmp1, tmp1
6917     *
6918     * tmp1 = (src.w == 0) ? 1 : tmp1;
6919     *   EQ tmp2, 0, src.w
6920     *   MOVC tmp1, tmp2, 1.0, tmp1
6921     *
6922     * dst.z = (0 < src.x) ? tmp1 : 0;
6923     *   LT tmp2, 0, src.x
6924     *   MOVC dst.z, tmp2, tmp1, 0.0
6925     */
6926    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6927       struct tgsi_full_dst_register dst_z =
6928          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6929 
6930       unsigned tmp1 = get_temp_index(emit);
6931       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
6932       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
6933       unsigned tmp2 = get_temp_index(emit);
6934       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
6935       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
6936 
6937       struct tgsi_full_src_register src_xxxx =
6938          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6939       struct tgsi_full_src_register src_yyyy =
6940          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6941       struct tgsi_full_src_register src_wwww =
6942          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
6943 
6944       struct tgsi_full_src_register zero =
6945          make_immediate_reg_float(emit, 0.0f);
6946       struct tgsi_full_src_register lowerbound =
6947          make_immediate_reg_float(emit, -128.0f);
6948       struct tgsi_full_src_register upperbound =
6949          make_immediate_reg_float(emit, 128.0f);
6950 
6951       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
6952                            &lowerbound);
6953       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
6954                            &upperbound);
6955       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
6956                            &zero);
6957 
6958       /* POW tmp1, tmp2, tmp1 */
6959       /* LOG tmp2, tmp2 */
6960       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
6961 
6962       /* MUL tmp1, tmp2, tmp1 */
6963       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
6964                            &tmp1_src);
6965 
6966       /* EXP tmp1, tmp1 */
6967       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
6968 
6969       /* EQ tmp2, 0, src.w */
6970       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
6971       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
6972       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
6973                            &tmp2_src, &one, &tmp1_src);
6974 
6975       /* LT tmp2, 0, src.x */
6976       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
6977       /* MOVC dst.z, tmp2, tmp1, 0.0 */
6978       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
6979                            &tmp2_src, &tmp1_src, &zero);
6980    }
6981 
6982    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6983    free_temp_indexes(emit);
6984 
6985    return TRUE;
6986 }
6987 
6988 
6989 /**
6990  * Emit Level Of Detail Query (LODQ) instruction.
6991  */
6992 static boolean
emit_lodq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6993 emit_lodq(struct svga_shader_emitter_v10 *emit,
6994           const struct tgsi_full_instruction *inst)
6995 {
6996    const uint unit = inst->Src[1].Register.Index;
6997 
6998    assert(emit->version >= 41);
6999 
7000    /* LOD dst, coord, resource, sampler */
7001    begin_emit_instruction(emit);
7002    emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
7003    emit_dst_register(emit, &inst->Dst[0]);
7004    emit_src_register(emit, &inst->Src[0]); /* coord */
7005    emit_resource_register(emit, unit);
7006    emit_sampler_register(emit, unit);
7007    end_emit_instruction(emit);
7008 
7009    return TRUE;
7010 }
7011 
7012 
7013 /**
7014  * Emit code for TGSI_OPCODE_LOG instruction.
7015  */
7016 static boolean
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7017 emit_log(struct svga_shader_emitter_v10 *emit,
7018          const struct tgsi_full_instruction *inst)
7019 {
7020    /*
7021     * dst.x = floor(lg2(abs(s0.x)))
7022     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7023     * dst.z = lg2(abs(s0.x))
7024     * dst.w = 1.0
7025     */
7026 
7027    struct tgsi_full_src_register src_xxxx =
7028       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7029    unsigned tmp = get_temp_index(emit);
7030    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7031    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7032    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7033 
7034    /* only use X component of temp reg */
7035    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7036    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7037 
7038    /* LOG tmp.x, abs(s0.x) */
7039    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7040       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7041    }
7042 
7043    /* MOV dst.z, tmp.x */
7044    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7045       struct tgsi_full_dst_register dst_z =
7046          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7047 
7048       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7049                            &dst_z, &tmp_src, NULL, NULL,
7050                            inst->Instruction.Saturate, FALSE);
7051    }
7052 
7053    /* FLR tmp.x, tmp.x */
7054    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7055       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7056    }
7057 
7058    /* MOV dst.x, tmp.x */
7059    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7060       struct tgsi_full_dst_register dst_x =
7061          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7062 
7063       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7064                            &dst_x, &tmp_src, NULL, NULL,
7065                            inst->Instruction.Saturate, FALSE);
7066    }
7067 
7068    /* EXP tmp.x, tmp.x */
7069    /* DIV dst.y, abs(s0.x), tmp.x */
7070    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7071       struct tgsi_full_dst_register dst_y =
7072          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7073 
7074       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7075       emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7076                            &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
7077    }
7078 
7079    /* MOV dst.w, 1.0 */
7080    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7081       struct tgsi_full_dst_register dst_w =
7082          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7083       struct tgsi_full_src_register one =
7084          make_immediate_reg_float(emit, 1.0f);
7085 
7086       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7087    }
7088 
7089    free_temp_indexes(emit);
7090 
7091    return TRUE;
7092 }
7093 
7094 
7095 /**
7096  * Emit code for TGSI_OPCODE_LRP instruction.
7097  */
7098 static boolean
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7099 emit_lrp(struct svga_shader_emitter_v10 *emit,
7100          const struct tgsi_full_instruction *inst)
7101 {
7102    /* dst = LRP(s0, s1, s2):
7103     *   dst = s0 * (s1 - s2) + s2
7104     * Translates into:
7105     *   SUB tmp, s1, s2;        tmp = s1 - s2
7106     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
7107     */
7108    unsigned tmp = get_temp_index(emit);
7109    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7110    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7111    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7112 
7113    /* ADD tmp, s1, -s2 */
7114    emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7115                         &inst->Src[1], &neg_src2, NULL, FALSE,
7116                         inst->Instruction.Precise);
7117 
7118    /* MAD dst, s1, tmp, s3 */
7119    emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7120                         &inst->Src[0], &src_tmp, &inst->Src[2],
7121                         inst->Instruction.Saturate,
7122                         inst->Instruction.Precise);
7123 
7124    free_temp_indexes(emit);
7125 
7126    return TRUE;
7127 }
7128 
7129 
7130 /**
7131  * Emit code for TGSI_OPCODE_POW instruction.
7132  */
7133 static boolean
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7134 emit_pow(struct svga_shader_emitter_v10 *emit,
7135          const struct tgsi_full_instruction *inst)
7136 {
7137    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7138     * src1.x while VGPU10 computes four values.
7139     *
7140     * dst = POW(src0, src1):
7141     *   dst.xyzw = src0.x ^ src1.x
7142     */
7143    unsigned tmp = get_temp_index(emit);
7144    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7145    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7146    struct tgsi_full_src_register src0_xxxx =
7147       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7148                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7149    struct tgsi_full_src_register src1_xxxx =
7150       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7151                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7152 
7153    /* LOG tmp, s0.xxxx */
7154    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7155                         &tmp_dst, &src0_xxxx, NULL, NULL,
7156                         FALSE, inst->Instruction.Precise);
7157 
7158    /* MUL tmp, tmp, s1.xxxx */
7159    emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7160                         &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7161                         FALSE, inst->Instruction.Precise);
7162 
7163    /* EXP tmp, s0.xxxx */
7164    emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7165                         &inst->Dst[0], &tmp_src, NULL, NULL,
7166                         inst->Instruction.Saturate,
7167                         inst->Instruction.Precise);
7168 
7169    /* free tmp */
7170    free_temp_indexes(emit);
7171 
7172    return TRUE;
7173 }
7174 
7175 
7176 /**
7177  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7178  */
7179 static boolean
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7180 emit_rcp(struct svga_shader_emitter_v10 *emit,
7181          const struct tgsi_full_instruction *inst)
7182 {
7183    if (emit->version >= 50) {
7184       /* use new RCP instruction.  But VGPU10_OPCODE_RCP is component-wise
7185        * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7186        * to manipulate the src register's swizzle.
7187        */
7188       struct tgsi_full_src_register src = inst->Src[0];
7189       src.Register.SwizzleY =
7190       src.Register.SwizzleZ =
7191       src.Register.SwizzleW = src.Register.SwizzleX;
7192 
7193       begin_emit_instruction(emit);
7194       emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7195                           inst->Instruction.Saturate,
7196                           inst->Instruction.Precise);
7197       emit_dst_register(emit, &inst->Dst[0]);
7198       emit_src_register(emit, &src);
7199       end_emit_instruction(emit);
7200    }
7201    else {
7202       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7203 
7204       unsigned tmp = get_temp_index(emit);
7205       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7206       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7207 
7208       struct tgsi_full_dst_register tmp_dst_x =
7209          writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7210       struct tgsi_full_src_register tmp_src_xxxx =
7211          scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7212 
7213       /* DIV tmp.x, 1.0, s0 */
7214       emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7215                            &tmp_dst_x, &one, &inst->Src[0], NULL,
7216                            FALSE, inst->Instruction.Precise);
7217 
7218       /* MOV dst, tmp.xxxx */
7219       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7220                            &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7221                            inst->Instruction.Saturate,
7222                            inst->Instruction.Precise);
7223 
7224       free_temp_indexes(emit);
7225    }
7226 
7227    return TRUE;
7228 }
7229 
7230 
7231 /**
7232  * Emit code for TGSI_OPCODE_RSQ instruction.
7233  */
7234 static boolean
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7235 emit_rsq(struct svga_shader_emitter_v10 *emit,
7236          const struct tgsi_full_instruction *inst)
7237 {
7238    /* dst = RSQ(src):
7239     *   dst.xyzw = 1 / sqrt(src.x)
7240     * Translates into:
7241     *   RSQ tmp, src.x
7242     *   MOV dst, tmp.xxxx
7243     */
7244 
7245    unsigned tmp = get_temp_index(emit);
7246    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7247    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7248 
7249    struct tgsi_full_dst_register tmp_dst_x =
7250       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7251    struct tgsi_full_src_register tmp_src_xxxx =
7252       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7253 
7254    /* RSQ tmp, src.x */
7255    emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7256                         &tmp_dst_x, &inst->Src[0], NULL, NULL,
7257                         FALSE, inst->Instruction.Precise);
7258 
7259    /* MOV dst, tmp.xxxx */
7260    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7261                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7262                         inst->Instruction.Saturate,
7263                         inst->Instruction.Precise);
7264 
7265    /* free tmp */
7266    free_temp_indexes(emit);
7267 
7268    return TRUE;
7269 }
7270 
7271 
7272 /**
7273  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7274  */
7275 static boolean
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7276 emit_seq(struct svga_shader_emitter_v10 *emit,
7277          const struct tgsi_full_instruction *inst)
7278 {
7279    /* dst = SEQ(s0, s1):
7280     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
7281     * Translates into:
7282     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7283     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7284     */
7285    unsigned tmp = get_temp_index(emit);
7286    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7287    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7288    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7289    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7290 
7291    /* EQ tmp, s0, s1 */
7292    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7293                         &inst->Src[1]);
7294 
7295    /* MOVC dst, tmp, one, zero */
7296    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7297                         &one, &zero);
7298 
7299    free_temp_indexes(emit);
7300 
7301    return TRUE;
7302 }
7303 
7304 
7305 /**
7306  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7307  */
7308 static boolean
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7309 emit_sge(struct svga_shader_emitter_v10 *emit,
7310          const struct tgsi_full_instruction *inst)
7311 {
7312    /* dst = SGE(s0, s1):
7313     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
7314     * Translates into:
7315     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7316     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7317     */
7318    unsigned tmp = get_temp_index(emit);
7319    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7320    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7321    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7322    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7323 
7324    /* GE tmp, s0, s1 */
7325    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7326                         &inst->Src[1]);
7327 
7328    /* MOVC dst, tmp, one, zero */
7329    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7330                         &one, &zero);
7331 
7332    free_temp_indexes(emit);
7333 
7334    return TRUE;
7335 }
7336 
7337 
7338 /**
7339  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7340  */
7341 static boolean
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7342 emit_sgt(struct svga_shader_emitter_v10 *emit,
7343          const struct tgsi_full_instruction *inst)
7344 {
7345    /* dst = SGT(s0, s1):
7346     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
7347     * Translates into:
7348     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7349     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7350     */
7351    unsigned tmp = get_temp_index(emit);
7352    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7353    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7354    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7355    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7356 
7357    /* LT tmp, s1, s0 */
7358    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
7359                         &inst->Src[0]);
7360 
7361    /* MOVC dst, tmp, one, zero */
7362    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7363                         &one, &zero);
7364 
7365    free_temp_indexes(emit);
7366 
7367    return TRUE;
7368 }
7369 
7370 
7371 /**
7372  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7373  */
7374 static boolean
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7375 emit_sincos(struct svga_shader_emitter_v10 *emit,
7376          const struct tgsi_full_instruction *inst)
7377 {
7378    unsigned tmp = get_temp_index(emit);
7379    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7380    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7381 
7382    struct tgsi_full_src_register tmp_src_xxxx =
7383       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7384    struct tgsi_full_dst_register tmp_dst_x =
7385       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7386 
7387    begin_emit_instruction(emit);
7388    emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
7389 
7390    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
7391    {
7392       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
7393       emit_null_dst_register(emit);  /* second destination register */
7394    }
7395    else {
7396       emit_null_dst_register(emit);
7397       emit_dst_register(emit, &tmp_dst_x);
7398    }
7399 
7400    emit_src_register(emit, &inst->Src[0]);
7401    end_emit_instruction(emit);
7402 
7403    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7404                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7405                         inst->Instruction.Saturate,
7406                         inst->Instruction.Precise);
7407 
7408    free_temp_indexes(emit);
7409 
7410    return TRUE;
7411 }
7412 
7413 
7414 /**
7415  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
7416  */
7417 static boolean
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7418 emit_sle(struct svga_shader_emitter_v10 *emit,
7419          const struct tgsi_full_instruction *inst)
7420 {
7421    /* dst = SLE(s0, s1):
7422     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
7423     * Translates into:
7424     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
7425     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7426     */
7427    unsigned tmp = get_temp_index(emit);
7428    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7429    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7430    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7431    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7432 
7433    /* GE tmp, s1, s0 */
7434    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
7435                         &inst->Src[0]);
7436 
7437    /* MOVC dst, tmp, one, zero */
7438    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7439                         &one, &zero);
7440 
7441    free_temp_indexes(emit);
7442 
7443    return TRUE;
7444 }
7445 
7446 
7447 /**
7448  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
7449  */
7450 static boolean
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7451 emit_slt(struct svga_shader_emitter_v10 *emit,
7452          const struct tgsi_full_instruction *inst)
7453 {
7454    /* dst = SLT(s0, s1):
7455     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
7456     * Translates into:
7457     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
7458     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7459     */
7460    unsigned tmp = get_temp_index(emit);
7461    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7462    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7463    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7464    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7465 
7466    /* LT tmp, s0, s1 */
7467    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
7468                         &inst->Src[1]);
7469 
7470    /* MOVC dst, tmp, one, zero */
7471    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7472                         &one, &zero);
7473 
7474    free_temp_indexes(emit);
7475 
7476    return TRUE;
7477 }
7478 
7479 
7480 /**
7481  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
7482  */
7483 static boolean
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7484 emit_sne(struct svga_shader_emitter_v10 *emit,
7485          const struct tgsi_full_instruction *inst)
7486 {
7487    /* dst = SNE(s0, s1):
7488     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
7489     * Translates into:
7490     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7491     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7492     */
7493    unsigned tmp = get_temp_index(emit);
7494    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7495    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7496    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7497    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7498 
7499    /* NE tmp, s0, s1 */
7500    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
7501                         &inst->Src[1]);
7502 
7503    /* MOVC dst, tmp, one, zero */
7504    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7505                         &one, &zero);
7506 
7507    free_temp_indexes(emit);
7508 
7509    return TRUE;
7510 }
7511 
7512 
7513 /**
7514  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
7515  */
7516 static boolean
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7517 emit_ssg(struct svga_shader_emitter_v10 *emit,
7518          const struct tgsi_full_instruction *inst)
7519 {
7520    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
7521     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
7522     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
7523     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
7524     * Translates into:
7525     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
7526     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
7527     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
7528     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
7529     */
7530    struct tgsi_full_src_register zero =
7531       make_immediate_reg_float(emit, 0.0f);
7532    struct tgsi_full_src_register one =
7533       make_immediate_reg_float(emit, 1.0f);
7534    struct tgsi_full_src_register neg_one =
7535       make_immediate_reg_float(emit, -1.0f);
7536 
7537    unsigned tmp1 = get_temp_index(emit);
7538    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7539    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7540 
7541    unsigned tmp2 = get_temp_index(emit);
7542    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7543    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7544 
7545    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
7546                         &zero);
7547    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
7548                         &neg_one, &zero);
7549    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
7550                         &inst->Src[0]);
7551    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
7552                         &one, &tmp2_src);
7553 
7554    free_temp_indexes(emit);
7555 
7556    return TRUE;
7557 }
7558 
7559 
7560 /**
7561  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
7562  */
7563 static boolean
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7564 emit_issg(struct svga_shader_emitter_v10 *emit,
7565           const struct tgsi_full_instruction *inst)
7566 {
7567    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
7568     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
7569     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
7570     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
7571     * Translates into:
7572     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
7573     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
7574     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
7575     */
7576    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7577 
7578    unsigned tmp1 = get_temp_index(emit);
7579    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7580    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7581 
7582    unsigned tmp2 = get_temp_index(emit);
7583    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7584    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7585 
7586    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
7587 
7588    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
7589                         &inst->Src[0], &zero);
7590    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
7591                         &zero, &inst->Src[0]);
7592    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
7593                         &tmp1_src, &neg_tmp2);
7594 
7595    free_temp_indexes(emit);
7596 
7597    return TRUE;
7598 }
7599 
7600 
7601 /**
7602  * Emit a comparison instruction.  The dest register will get
7603  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
7604  */
7605 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)7606 emit_comparison(struct svga_shader_emitter_v10 *emit,
7607                 SVGA3dCmpFunc func,
7608                 const struct tgsi_full_dst_register *dst,
7609                 const struct tgsi_full_src_register *src0,
7610                 const struct tgsi_full_src_register *src1)
7611 {
7612    struct tgsi_full_src_register immediate;
7613    VGPU10OpcodeToken0 opcode0;
7614    boolean swapSrc = FALSE;
7615 
7616    /* Sanity checks for svga vs. gallium enums */
7617    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
7618    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
7619 
7620    opcode0.value = 0;
7621 
7622    switch (func) {
7623    case SVGA3D_CMP_NEVER:
7624       immediate = make_immediate_reg_int(emit, 0);
7625       /* MOV dst, {0} */
7626       begin_emit_instruction(emit);
7627       emit_dword(emit, VGPU10_OPCODE_MOV);
7628       emit_dst_register(emit, dst);
7629       emit_src_register(emit, &immediate);
7630       end_emit_instruction(emit);
7631       return;
7632    case SVGA3D_CMP_ALWAYS:
7633       immediate = make_immediate_reg_int(emit, -1);
7634       /* MOV dst, {-1} */
7635       begin_emit_instruction(emit);
7636       emit_dword(emit, VGPU10_OPCODE_MOV);
7637       emit_dst_register(emit, dst);
7638       emit_src_register(emit, &immediate);
7639       end_emit_instruction(emit);
7640       return;
7641    case SVGA3D_CMP_LESS:
7642       opcode0.opcodeType = VGPU10_OPCODE_LT;
7643       break;
7644    case SVGA3D_CMP_EQUAL:
7645       opcode0.opcodeType = VGPU10_OPCODE_EQ;
7646       break;
7647    case SVGA3D_CMP_LESSEQUAL:
7648       opcode0.opcodeType = VGPU10_OPCODE_GE;
7649       swapSrc = TRUE;
7650       break;
7651    case SVGA3D_CMP_GREATER:
7652       opcode0.opcodeType = VGPU10_OPCODE_LT;
7653       swapSrc = TRUE;
7654       break;
7655    case SVGA3D_CMP_NOTEQUAL:
7656       opcode0.opcodeType = VGPU10_OPCODE_NE;
7657       break;
7658    case SVGA3D_CMP_GREATEREQUAL:
7659       opcode0.opcodeType = VGPU10_OPCODE_GE;
7660       break;
7661    default:
7662       assert(!"Unexpected comparison mode");
7663       opcode0.opcodeType = VGPU10_OPCODE_EQ;
7664    }
7665 
7666    begin_emit_instruction(emit);
7667    emit_dword(emit, opcode0.value);
7668    emit_dst_register(emit, dst);
7669    if (swapSrc) {
7670       emit_src_register(emit, src1);
7671       emit_src_register(emit, src0);
7672    }
7673    else {
7674       emit_src_register(emit, src0);
7675       emit_src_register(emit, src1);
7676    }
7677    end_emit_instruction(emit);
7678 }
7679 
7680 
7681 /**
7682  * Get texel/address offsets for a texture instruction.
7683  */
7684 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])7685 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
7686                   const struct tgsi_full_instruction *inst, int offsets[3])
7687 {
7688    if (inst->Texture.NumOffsets == 1) {
7689       /* According to OpenGL Shader Language spec the offsets are only
7690        * fetched from a previously-declared immediate/literal.
7691        */
7692       const struct tgsi_texture_offset *off = inst->TexOffsets;
7693       const unsigned index = off[0].Index;
7694       const unsigned swizzleX = off[0].SwizzleX;
7695       const unsigned swizzleY = off[0].SwizzleY;
7696       const unsigned swizzleZ = off[0].SwizzleZ;
7697       const union tgsi_immediate_data *imm = emit->immediates[index];
7698 
7699       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
7700 
7701       offsets[0] = imm[swizzleX].Int;
7702       offsets[1] = imm[swizzleY].Int;
7703       offsets[2] = imm[swizzleZ].Int;
7704    }
7705    else {
7706       offsets[0] = offsets[1] = offsets[2] = 0;
7707    }
7708 }
7709 
7710 
7711 /**
7712  * Set up the coordinate register for texture sampling.
7713  * When we're sampling from a RECT texture we have to scale the
7714  * unnormalized coordinate to a normalized coordinate.
7715  * We do that by multiplying the coordinate by an "extra" constant.
7716  * An alternative would be to use the RESINFO instruction to query the
7717  * texture's size.
7718  */
7719 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)7720 setup_texcoord(struct svga_shader_emitter_v10 *emit,
7721                unsigned unit,
7722                const struct tgsi_full_src_register *coord)
7723 {
7724    if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
7725       unsigned scale_index = emit->texcoord_scale_index[unit];
7726       unsigned tmp = get_temp_index(emit);
7727       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7728       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7729       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
7730 
7731       if (emit->key.tex[unit].texel_bias) {
7732          /* to fix texture coordinate rounding issue, 0.0001 offset is
7733           * been added. This fixes piglit test fbo-blit-scaled-linear. */
7734          struct tgsi_full_src_register offset =
7735             make_immediate_reg_float(emit, 0.0001f);
7736 
7737          /* ADD tmp, coord, offset */
7738          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
7739                               coord, &offset);
7740          /* MUL tmp, tmp, scale */
7741          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7742                               &tmp_src, &scale_src);
7743       }
7744       else {
7745          /* MUL tmp, coord, const[] */
7746          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7747                               coord, &scale_src);
7748       }
7749       return tmp_src;
7750    }
7751    else {
7752       /* use texcoord as-is */
7753       return *coord;
7754    }
7755 }
7756 
7757 
7758 /**
7759  * For SAMPLE_C instructions, emit the extra src register which indicates
7760  * the reference/comparision value.
7761  */
7762 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)7763 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
7764                           enum tgsi_texture_type target,
7765                           const struct tgsi_full_src_register *coord)
7766 {
7767    struct tgsi_full_src_register coord_src_ref;
7768    int component;
7769 
7770    assert(tgsi_is_shadow_target(target));
7771 
7772    component = tgsi_util_get_shadow_ref_src_index(target) % 4;
7773    assert(component >= 0);
7774 
7775    coord_src_ref = scalar_src(coord, component);
7776 
7777    emit_src_register(emit, &coord_src_ref);
7778 }
7779 
7780 
7781 /**
7782  * Info for implementing texture swizzles.
7783  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
7784  * functions use this to encapsulate the extra steps needed to perform
7785  * a texture swizzle, or shadow/depth comparisons.
7786  * The shadow/depth comparison is only done here if for the cases where
7787  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
7788  */
7789 struct tex_swizzle_info
7790 {
7791    boolean swizzled;
7792    boolean shadow_compare;
7793    unsigned unit;
7794    enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
7795    struct tgsi_full_src_register tmp_src;
7796    struct tgsi_full_dst_register tmp_dst;
7797    const struct tgsi_full_dst_register *inst_dst;
7798    const struct tgsi_full_src_register *coord_src;
7799 };
7800 
7801 
7802 /**
7803  * Do setup for handling texture swizzles or shadow compares.
7804  * \param unit  the texture unit
7805  * \param inst  the TGSI texture instruction
7806  * \param shadow_compare  do shadow/depth comparison?
7807  * \param swz  returns the swizzle info
7808  */
7809 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,boolean shadow_compare,struct tex_swizzle_info * swz)7810 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7811                   unsigned unit,
7812                   const struct tgsi_full_instruction *inst,
7813                   boolean shadow_compare,
7814                   struct tex_swizzle_info *swz)
7815 {
7816    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
7817                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
7818                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
7819                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
7820 
7821    swz->shadow_compare = shadow_compare;
7822    swz->texture_target = inst->Texture.Texture;
7823 
7824    if (swz->swizzled || shadow_compare) {
7825       /* Allocate temp register for the result of the SAMPLE instruction
7826        * and the source of the MOV/compare/swizzle instructions.
7827        */
7828       unsigned tmp = get_temp_index(emit);
7829       swz->tmp_src = make_src_temp_reg(tmp);
7830       swz->tmp_dst = make_dst_temp_reg(tmp);
7831 
7832       swz->unit = unit;
7833    }
7834    swz->inst_dst = &inst->Dst[0];
7835    swz->coord_src = &inst->Src[0];
7836 
7837    emit->fs.shadow_compare_units |= shadow_compare << unit;
7838 }
7839 
7840 
7841 /**
7842  * Returns the register to put the SAMPLE instruction results into.
7843  * This will either be the original instruction dst reg (if no swizzle
7844  * and no shadow comparison) or a temporary reg if there is a swizzle.
7845  */
7846 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)7847 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
7848 {
7849    return (swz->swizzled || swz->shadow_compare)
7850       ? &swz->tmp_dst : swz->inst_dst;
7851 }
7852 
7853 
7854 /**
7855  * This emits the MOV instruction that actually implements a texture swizzle
7856  * and/or shadow comparison.
7857  */
7858 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)7859 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7860                 const struct tex_swizzle_info *swz)
7861 {
7862    if (swz->shadow_compare) {
7863       /* Emit extra instructions to compare the fetched texel value against
7864        * a texture coordinate component.  The result of the comparison
7865        * is 0.0 or 1.0.
7866        */
7867       struct tgsi_full_src_register coord_src;
7868       struct tgsi_full_src_register texel_src =
7869          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
7870       struct tgsi_full_src_register one =
7871          make_immediate_reg_float(emit, 1.0f);
7872       /* convert gallium comparison func to SVGA comparison func */
7873       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
7874 
7875       int component =
7876          tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
7877       assert(component >= 0);
7878       coord_src = scalar_src(swz->coord_src, component);
7879 
7880       /* COMPARE tmp, coord, texel */
7881       emit_comparison(emit, compare_func,
7882                       &swz->tmp_dst, &coord_src, &texel_src);
7883 
7884       /* AND dest, tmp, {1.0} */
7885       begin_emit_instruction(emit);
7886       emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
7887       if (swz->swizzled) {
7888          emit_dst_register(emit, &swz->tmp_dst);
7889       }
7890       else {
7891          emit_dst_register(emit, swz->inst_dst);
7892       }
7893       emit_src_register(emit, &swz->tmp_src);
7894       emit_src_register(emit, &one);
7895       end_emit_instruction(emit);
7896    }
7897 
7898    if (swz->swizzled) {
7899       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
7900       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
7901       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
7902       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
7903       unsigned writemask_0 = 0, writemask_1 = 0;
7904       boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
7905 
7906       /* Swizzle w/out zero/one terms */
7907       struct tgsi_full_src_register src_swizzled =
7908          swizzle_src(&swz->tmp_src,
7909                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
7910                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
7911                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
7912                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
7913 
7914       /* MOV dst, color(tmp).<swizzle> */
7915       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
7916                            swz->inst_dst, &src_swizzled);
7917 
7918       /* handle swizzle zero terms */
7919       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
7920                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
7921                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
7922                      ((swz_a == PIPE_SWIZZLE_0) << 3));
7923       writemask_0 &= swz->inst_dst->Register.WriteMask;
7924 
7925       if (writemask_0) {
7926          struct tgsi_full_src_register zero = int_tex ?
7927             make_immediate_reg_int(emit, 0) :
7928             make_immediate_reg_float(emit, 0.0f);
7929          struct tgsi_full_dst_register dst =
7930             writemask_dst(swz->inst_dst, writemask_0);
7931 
7932          /* MOV dst.writemask_0, {0,0,0,0} */
7933          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
7934       }
7935 
7936       /* handle swizzle one terms */
7937       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
7938                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
7939                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
7940                      ((swz_a == PIPE_SWIZZLE_1) << 3));
7941       writemask_1 &= swz->inst_dst->Register.WriteMask;
7942 
7943       if (writemask_1) {
7944          struct tgsi_full_src_register one = int_tex ?
7945             make_immediate_reg_int(emit, 1) :
7946             make_immediate_reg_float(emit, 1.0f);
7947          struct tgsi_full_dst_register dst =
7948             writemask_dst(swz->inst_dst, writemask_1);
7949 
7950          /* MOV dst.writemask_1, {1,1,1,1} */
7951          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
7952       }
7953    }
7954 }
7955 
7956 
7957 /**
7958  * Emit code for TGSI_OPCODE_SAMPLE instruction.
7959  */
7960 static boolean
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7961 emit_sample(struct svga_shader_emitter_v10 *emit,
7962             const struct tgsi_full_instruction *inst)
7963 {
7964    const unsigned resource_unit = inst->Src[1].Register.Index;
7965    const unsigned sampler_unit = inst->Src[2].Register.Index;
7966    struct tgsi_full_src_register coord;
7967    int offsets[3];
7968    struct tex_swizzle_info swz_info;
7969 
7970    begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
7971 
7972    get_texel_offsets(emit, inst, offsets);
7973 
7974    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
7975 
7976    /* SAMPLE dst, coord(s0), resource, sampler */
7977    begin_emit_instruction(emit);
7978 
7979    /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
7980     * with LOD=0.  But our virtual GPU accepts this as-is.
7981     */
7982    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
7983                       inst->Instruction.Saturate, offsets);
7984    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
7985    emit_src_register(emit, &coord);
7986    emit_resource_register(emit, resource_unit);
7987    emit_sampler_register(emit, sampler_unit);
7988    end_emit_instruction(emit);
7989 
7990    end_tex_swizzle(emit, &swz_info);
7991 
7992    free_temp_indexes(emit);
7993 
7994    return TRUE;
7995 }
7996 
7997 
7998 /**
7999  * Check if a texture instruction is valid.
8000  * An example of an invalid texture instruction is doing shadow comparison
8001  * with an integer-valued texture.
8002  * If we detect an invalid texture instruction, we replace it with:
8003  *   MOV dst, {1,1,1,1};
8004  * \return TRUE if valid, FALSE if invalid.
8005  */
8006 static boolean
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8007 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8008                          const struct tgsi_full_instruction *inst)
8009 {
8010    const unsigned unit = inst->Src[1].Register.Index;
8011    const enum tgsi_texture_type target = inst->Texture.Texture;
8012    boolean valid = TRUE;
8013 
8014    if (tgsi_is_shadow_target(target) &&
8015        is_integer_type(emit->sampler_return_type[unit])) {
8016       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8017       valid = FALSE;
8018    }
8019    /* XXX might check for other conditions in the future here */
8020 
8021    if (!valid) {
8022       /* emit a MOV dst, {1,1,1,1} instruction. */
8023       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8024       begin_emit_instruction(emit);
8025       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
8026       emit_dst_register(emit, &inst->Dst[0]);
8027       emit_src_register(emit, &one);
8028       end_emit_instruction(emit);
8029    }
8030 
8031    return valid;
8032 }
8033 
8034 
8035 /**
8036  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8037  */
8038 static boolean
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8039 emit_tex(struct svga_shader_emitter_v10 *emit,
8040          const struct tgsi_full_instruction *inst)
8041 {
8042    const uint unit = inst->Src[1].Register.Index;
8043    const enum tgsi_texture_type target = inst->Texture.Texture;
8044    VGPU10_OPCODE_TYPE opcode;
8045    struct tgsi_full_src_register coord;
8046    int offsets[3];
8047    struct tex_swizzle_info swz_info;
8048 
8049    /* check that the sampler returns a float */
8050    if (!is_valid_tex_instruction(emit, inst))
8051       return TRUE;
8052 
8053    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8054 
8055    get_texel_offsets(emit, inst, offsets);
8056 
8057    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8058 
8059    /* SAMPLE dst, coord(s0), resource, sampler */
8060    begin_emit_instruction(emit);
8061 
8062    if (tgsi_is_shadow_target(target))
8063       opcode = VGPU10_OPCODE_SAMPLE_C;
8064    else
8065       opcode = VGPU10_OPCODE_SAMPLE;
8066 
8067    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8068    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8069    emit_src_register(emit, &coord);
8070    emit_resource_register(emit, unit);
8071    emit_sampler_register(emit, unit);
8072    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8073       emit_tex_compare_refcoord(emit, target, &coord);
8074    }
8075    end_emit_instruction(emit);
8076 
8077    end_tex_swizzle(emit, &swz_info);
8078 
8079    free_temp_indexes(emit);
8080 
8081    return TRUE;
8082 }
8083 
8084 /**
8085  * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8086  */
8087 static boolean
emit_tg4(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8088 emit_tg4(struct svga_shader_emitter_v10 *emit,
8089          const struct tgsi_full_instruction *inst)
8090 {
8091    const uint unit = inst->Src[2].Register.Index;
8092    struct tgsi_full_src_register src;
8093    struct tgsi_full_src_register offset_src, sampler, ref;
8094    int offsets[3];
8095 
8096    /* check that the sampler returns a float */
8097    if (!is_valid_tex_instruction(emit, inst))
8098       return TRUE;
8099 
8100    if (emit->version >= 50) {
8101       unsigned target = inst->Texture.Texture;
8102       int index = inst->Src[1].Register.Index;
8103       const union tgsi_immediate_data *imm = emit->immediates[index];
8104       int select_comp  = imm[inst->Src[1].Register.SwizzleX].Int;
8105       unsigned select_swizzle = PIPE_SWIZZLE_X;
8106 
8107       if (!tgsi_is_shadow_target(target)) {
8108          switch (select_comp) {
8109          case 0:
8110             select_swizzle = emit->key.tex[unit].swizzle_r;
8111             break;
8112          case 1:
8113             select_swizzle = emit->key.tex[unit].swizzle_g;
8114             break;
8115          case 2:
8116             select_swizzle = emit->key.tex[unit].swizzle_b;
8117             break;
8118          case 3:
8119             select_swizzle = emit->key.tex[unit].swizzle_a;
8120             break;
8121          default:
8122             assert(!"Unexpected component in texture gather swizzle");
8123          }
8124       }
8125       else {
8126          select_swizzle = emit->key.tex[unit].swizzle_r;
8127       }
8128 
8129       if (select_swizzle == PIPE_SWIZZLE_1) {
8130          src = make_immediate_reg_float(emit, 1.0);
8131          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8132          return TRUE;
8133       }
8134       else if (select_swizzle == PIPE_SWIZZLE_0) {
8135          src = make_immediate_reg_float(emit, 0.0);
8136          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8137          return TRUE;
8138       }
8139 
8140       src = setup_texcoord(emit, unit, &inst->Src[0]);
8141 
8142       /* GATHER4 dst, coord, resource, sampler */
8143       /* GATHER4_C dst, coord, resource, sampler ref */
8144       /* GATHER4_PO dst, coord, offset resource, sampler */
8145       /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8146       begin_emit_instruction(emit);
8147       if (inst->Texture.NumOffsets == 1) {
8148          if (tgsi_is_shadow_target(target)) {
8149             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8150                         inst->Instruction.Saturate);
8151          }
8152          else {
8153             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8154                         inst->Instruction.Saturate);
8155          }
8156       }
8157       else {
8158          if (tgsi_is_shadow_target(target)) {
8159             emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8160                         inst->Instruction.Saturate);
8161          }
8162          else {
8163             emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8164                         inst->Instruction.Saturate);
8165          }
8166       }
8167 
8168       emit_dst_register(emit, &inst->Dst[0]);
8169       emit_src_register(emit, &src);
8170       if (inst->Texture.NumOffsets == 1) {
8171          /* offset */
8172          offset_src = make_src_reg(inst->TexOffsets[0].File,
8173                                    inst->TexOffsets[0].Index);
8174          offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8175                                   inst->TexOffsets[0].SwizzleY,
8176                                   inst->TexOffsets[0].SwizzleZ,
8177                                   TGSI_SWIZZLE_W);
8178          emit_src_register(emit, &offset_src);
8179       }
8180 
8181       /* resource */
8182       emit_resource_register(emit, unit);
8183 
8184       /* sampler */
8185       sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8186       sampler.Register.SwizzleX =
8187       sampler.Register.SwizzleY =
8188       sampler.Register.SwizzleZ =
8189       sampler.Register.SwizzleW = select_swizzle;
8190       emit_src_register(emit, &sampler);
8191 
8192       if (tgsi_is_shadow_target(target)) {
8193          /* ref */
8194          if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8195             ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8196             emit_tex_compare_refcoord(emit, target, &ref);
8197          }
8198          else {
8199             emit_tex_compare_refcoord(emit, target, &src);
8200          }
8201       }
8202 
8203       end_emit_instruction(emit);
8204       free_temp_indexes(emit);
8205    }
8206    else {
8207       /* Only a single channel is supported in SM4_1 and we report
8208        * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8209        * Only the 0th component will be gathered.
8210        */
8211       switch (emit->key.tex[unit].swizzle_r) {
8212       case PIPE_SWIZZLE_X:
8213          get_texel_offsets(emit, inst, offsets);
8214          src = setup_texcoord(emit, unit, &inst->Src[0]);
8215 
8216          /* Gather dst, coord, resource, sampler */
8217          begin_emit_instruction(emit);
8218          emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8219                             inst->Instruction.Saturate, offsets);
8220          emit_dst_register(emit, &inst->Dst[0]);
8221          emit_src_register(emit, &src);
8222          emit_resource_register(emit, unit);
8223 
8224          /* sampler */
8225          sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8226          sampler.Register.SwizzleX =
8227          sampler.Register.SwizzleY =
8228          sampler.Register.SwizzleZ =
8229          sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8230          emit_src_register(emit, &sampler);
8231 
8232          end_emit_instruction(emit);
8233          break;
8234       case PIPE_SWIZZLE_W:
8235       case PIPE_SWIZZLE_1:
8236          src = make_immediate_reg_float(emit, 1.0);
8237          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8238          break;
8239       case PIPE_SWIZZLE_Y:
8240       case PIPE_SWIZZLE_Z:
8241       case PIPE_SWIZZLE_0:
8242       default:
8243          src = make_immediate_reg_float(emit, 0.0);
8244          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8245          break;
8246       }
8247    }
8248 
8249    return TRUE;
8250 }
8251 
8252 
8253 
8254 /**
8255  * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8256  */
8257 static boolean
emit_tex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8258 emit_tex2(struct svga_shader_emitter_v10 *emit,
8259          const struct tgsi_full_instruction *inst)
8260 {
8261    const uint unit = inst->Src[2].Register.Index;
8262    unsigned target = inst->Texture.Texture;
8263    struct tgsi_full_src_register coord, ref;
8264    int offsets[3];
8265    struct tex_swizzle_info swz_info;
8266 
8267    /* check that the sampler returns a float */
8268    if (!is_valid_tex_instruction(emit, inst))
8269       return TRUE;
8270 
8271    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8272 
8273    get_texel_offsets(emit, inst, offsets);
8274 
8275    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8276    ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8277 
8278    /* SAMPLE_C dst, coord, resource, sampler, ref */
8279    begin_emit_instruction(emit);
8280    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C,
8281                       inst->Instruction.Saturate, offsets);
8282    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8283    emit_src_register(emit, &coord);
8284    emit_resource_register(emit, unit);
8285    emit_sampler_register(emit, unit);
8286    emit_tex_compare_refcoord(emit, target, &ref);
8287    end_emit_instruction(emit);
8288 
8289    end_tex_swizzle(emit, &swz_info);
8290 
8291    free_temp_indexes(emit);
8292 
8293    return TRUE;
8294 }
8295 
8296 
8297 /**
8298  * Emit code for TGSI_OPCODE_TXP (projective texture)
8299  */
8300 static boolean
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8301 emit_txp(struct svga_shader_emitter_v10 *emit,
8302          const struct tgsi_full_instruction *inst)
8303 {
8304    const uint unit = inst->Src[1].Register.Index;
8305    const enum tgsi_texture_type target = inst->Texture.Texture;
8306    VGPU10_OPCODE_TYPE opcode;
8307    int offsets[3];
8308    unsigned tmp = get_temp_index(emit);
8309    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8310    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8311    struct tgsi_full_src_register src0_wwww =
8312       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8313    struct tgsi_full_src_register coord;
8314    struct tex_swizzle_info swz_info;
8315 
8316    /* check that the sampler returns a float */
8317    if (!is_valid_tex_instruction(emit, inst))
8318       return TRUE;
8319 
8320    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8321 
8322    get_texel_offsets(emit, inst, offsets);
8323 
8324    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8325 
8326    /* DIV tmp, coord, coord.wwww */
8327    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
8328                         &coord, &src0_wwww);
8329 
8330    /* SAMPLE dst, coord(tmp), resource, sampler */
8331    begin_emit_instruction(emit);
8332 
8333    if (tgsi_is_shadow_target(target))
8334       /* NOTE: for non-fragment shaders, we should use
8335        * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8336        */
8337       opcode = VGPU10_OPCODE_SAMPLE_C;
8338    else
8339       opcode = VGPU10_OPCODE_SAMPLE;
8340 
8341    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8342    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8343    emit_src_register(emit, &tmp_src);  /* projected coord */
8344    emit_resource_register(emit, unit);
8345    emit_sampler_register(emit, unit);
8346    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8347       emit_tex_compare_refcoord(emit, target, &tmp_src);
8348    }
8349    end_emit_instruction(emit);
8350 
8351    end_tex_swizzle(emit, &swz_info);
8352 
8353    free_temp_indexes(emit);
8354 
8355    return TRUE;
8356 }
8357 
8358 
8359 /**
8360  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8361  */
8362 static boolean
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8363 emit_txd(struct svga_shader_emitter_v10 *emit,
8364          const struct tgsi_full_instruction *inst)
8365 {
8366    const uint unit = inst->Src[3].Register.Index;
8367    const enum tgsi_texture_type target = inst->Texture.Texture;
8368    int offsets[3];
8369    struct tgsi_full_src_register coord;
8370    struct tex_swizzle_info swz_info;
8371 
8372    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8373                      &swz_info);
8374 
8375    get_texel_offsets(emit, inst, offsets);
8376 
8377    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8378 
8379    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8380    begin_emit_instruction(emit);
8381    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
8382                       inst->Instruction.Saturate, offsets);
8383    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8384    emit_src_register(emit, &coord);
8385    emit_resource_register(emit, unit);
8386    emit_sampler_register(emit, unit);
8387    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
8388    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
8389    end_emit_instruction(emit);
8390 
8391    end_tex_swizzle(emit, &swz_info);
8392 
8393    free_temp_indexes(emit);
8394 
8395    return TRUE;
8396 }
8397 
8398 
8399 /**
8400  * Emit code for TGSI_OPCODE_TXF (texel fetch)
8401  */
8402 static boolean
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8403 emit_txf(struct svga_shader_emitter_v10 *emit,
8404          const struct tgsi_full_instruction *inst)
8405 {
8406    const uint unit = inst->Src[1].Register.Index;
8407    const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
8408       && emit->key.tex[unit].num_samples > 1;
8409    int offsets[3];
8410    struct tex_swizzle_info swz_info;
8411 
8412    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8413 
8414    get_texel_offsets(emit, inst, offsets);
8415 
8416    if (msaa) {
8417       assert(emit->key.tex[unit].num_samples > 1);
8418 
8419       /* Fetch one sample from an MSAA texture */
8420       struct tgsi_full_src_register sampleIndex =
8421          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8422       /* LD_MS dst, coord(s0), resource, sampleIndex */
8423       begin_emit_instruction(emit);
8424       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
8425                          inst->Instruction.Saturate, offsets);
8426       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8427       emit_src_register(emit, &inst->Src[0]);
8428       emit_resource_register(emit, unit);
8429       emit_src_register(emit, &sampleIndex);
8430       end_emit_instruction(emit);
8431    }
8432    else {
8433       /* Fetch one texel specified by integer coordinate */
8434       /* LD dst, coord(s0), resource */
8435       begin_emit_instruction(emit);
8436       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
8437                          inst->Instruction.Saturate, offsets);
8438       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8439       emit_src_register(emit, &inst->Src[0]);
8440       emit_resource_register(emit, unit);
8441       end_emit_instruction(emit);
8442    }
8443 
8444    end_tex_swizzle(emit, &swz_info);
8445 
8446    free_temp_indexes(emit);
8447 
8448    return TRUE;
8449 }
8450 
8451 
8452 /**
8453  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
8454  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
8455  */
8456 static boolean
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8457 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
8458              const struct tgsi_full_instruction *inst)
8459 {
8460    const enum tgsi_texture_type target = inst->Texture.Texture;
8461    VGPU10_OPCODE_TYPE opcode;
8462    unsigned unit;
8463    int offsets[3];
8464    struct tgsi_full_src_register coord, lod_bias;
8465    struct tex_swizzle_info swz_info;
8466 
8467    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
8468           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
8469           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
8470 
8471    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
8472       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8473       unit = inst->Src[2].Register.Index;
8474    }
8475    else {
8476       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8477       unit = inst->Src[1].Register.Index;
8478    }
8479 
8480    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8481                      &swz_info);
8482 
8483    get_texel_offsets(emit, inst, offsets);
8484 
8485    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8486 
8487    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
8488    begin_emit_instruction(emit);
8489    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
8490       opcode = VGPU10_OPCODE_SAMPLE_L;
8491    }
8492    else {
8493       opcode = VGPU10_OPCODE_SAMPLE_B;
8494    }
8495    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8496    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8497    emit_src_register(emit, &coord);
8498    emit_resource_register(emit, unit);
8499    emit_sampler_register(emit, unit);
8500    emit_src_register(emit, &lod_bias);
8501    end_emit_instruction(emit);
8502 
8503    end_tex_swizzle(emit, &swz_info);
8504 
8505    free_temp_indexes(emit);
8506 
8507    return TRUE;
8508 }
8509 
8510 
8511 /**
8512  * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
8513  */
8514 static boolean
emit_txl2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8515 emit_txl2(struct svga_shader_emitter_v10 *emit,
8516           const struct tgsi_full_instruction *inst)
8517 {
8518    unsigned target = inst->Texture.Texture;
8519    unsigned opcode, unit;
8520    int offsets[3];
8521    struct tgsi_full_src_register coord, lod;
8522    struct tex_swizzle_info swz_info;
8523 
8524    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
8525 
8526    lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8527    unit = inst->Src[2].Register.Index;
8528 
8529    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8530                      &swz_info);
8531 
8532    get_texel_offsets(emit, inst, offsets);
8533 
8534    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8535 
8536    /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
8537    begin_emit_instruction(emit);
8538    opcode = VGPU10_OPCODE_SAMPLE_L;
8539    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8540    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8541    emit_src_register(emit, &coord);
8542    emit_resource_register(emit, unit);
8543    emit_sampler_register(emit, unit);
8544    emit_src_register(emit, &lod);
8545    end_emit_instruction(emit);
8546 
8547    end_tex_swizzle(emit, &swz_info);
8548 
8549    free_temp_indexes(emit);
8550 
8551    return TRUE;
8552 }
8553 
8554 
8555 /**
8556  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
8557  */
8558 static boolean
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8559 emit_txq(struct svga_shader_emitter_v10 *emit,
8560          const struct tgsi_full_instruction *inst)
8561 {
8562    const uint unit = inst->Src[1].Register.Index;
8563 
8564    if (emit->key.tex[unit].target == PIPE_BUFFER) {
8565       /* RESINFO does not support querying texture buffers, so we instead
8566        * store texture buffer sizes in shader constants, then copy them to
8567        * implement TXQ instead of emitting RESINFO.
8568        * MOV dst, const[texture_buffer_size_index[unit]]
8569        */
8570       struct tgsi_full_src_register size_src =
8571          make_src_const_reg(emit->texture_buffer_size_index[unit]);
8572       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
8573    } else {
8574       /* RESINFO dst, srcMipLevel, resource */
8575       begin_emit_instruction(emit);
8576       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
8577       emit_dst_register(emit, &inst->Dst[0]);
8578       emit_src_register(emit, &inst->Src[0]);
8579       emit_resource_register(emit, unit);
8580       end_emit_instruction(emit);
8581    }
8582 
8583    free_temp_indexes(emit);
8584 
8585    return TRUE;
8586 }
8587 
8588 
8589 /**
8590  * Does this opcode produce a double-precision result?
8591  * XXX perhaps move this to a TGSI utility.
8592  */
8593 static bool
opcode_has_dbl_dst(unsigned opcode)8594 opcode_has_dbl_dst(unsigned opcode)
8595 {
8596    switch (opcode) {
8597    case TGSI_OPCODE_F2D:
8598    case TGSI_OPCODE_DABS:
8599    case TGSI_OPCODE_DADD:
8600    case TGSI_OPCODE_DFRAC:
8601    case TGSI_OPCODE_DMAX:
8602    case TGSI_OPCODE_DMIN:
8603    case TGSI_OPCODE_DMUL:
8604    case TGSI_OPCODE_DNEG:
8605    case TGSI_OPCODE_I2D:
8606    case TGSI_OPCODE_U2D:
8607       // XXX more TBD
8608       return true;
8609    default:
8610       return false;
8611    }
8612 }
8613 
8614 
8615 /**
8616  * Does this opcode use double-precision source registers?
8617  */
8618 static bool
opcode_has_dbl_src(unsigned opcode)8619 opcode_has_dbl_src(unsigned opcode)
8620 {
8621    switch (opcode) {
8622    case TGSI_OPCODE_D2F:
8623    case TGSI_OPCODE_DABS:
8624    case TGSI_OPCODE_DADD:
8625    case TGSI_OPCODE_DFRAC:
8626    case TGSI_OPCODE_DMAX:
8627    case TGSI_OPCODE_DMIN:
8628    case TGSI_OPCODE_DMUL:
8629    case TGSI_OPCODE_DNEG:
8630    case TGSI_OPCODE_D2I:
8631    case TGSI_OPCODE_D2U:
8632       // XXX more TBD
8633       return true;
8634    default:
8635       return false;
8636    }
8637 }
8638 
8639 
8640 /**
8641  * Check that the swizzle for reading from a double-precision register
8642  * is valid.
8643  */
8644 static void
check_double_src_swizzle(const struct tgsi_full_src_register * reg)8645 check_double_src_swizzle(const struct tgsi_full_src_register *reg)
8646 {
8647    assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
8648            reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
8649           (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
8650            reg->Register.SwizzleY == PIPE_SWIZZLE_W));
8651 
8652    assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
8653            reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
8654           (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
8655            reg->Register.SwizzleW == PIPE_SWIZZLE_W));
8656 }
8657 
8658 
8659 /**
8660  * Check that the writemask for a double-precision instruction is valid.
8661  */
8662 static void
check_double_dst_writemask(const struct tgsi_full_instruction * inst)8663 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
8664 {
8665    ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
8666 
8667    switch (inst->Instruction.Opcode) {
8668    case TGSI_OPCODE_DABS:
8669    case TGSI_OPCODE_DADD:
8670    case TGSI_OPCODE_DFRAC:
8671    case TGSI_OPCODE_DNEG:
8672    case TGSI_OPCODE_DMAD:
8673    case TGSI_OPCODE_DMAX:
8674    case TGSI_OPCODE_DMIN:
8675    case TGSI_OPCODE_DMUL:
8676    case TGSI_OPCODE_DRCP:
8677    case TGSI_OPCODE_DSQRT:
8678    case TGSI_OPCODE_F2D:
8679       assert(writemask == TGSI_WRITEMASK_XYZW ||
8680              writemask == TGSI_WRITEMASK_XY ||
8681              writemask == TGSI_WRITEMASK_ZW);
8682       break;
8683    case TGSI_OPCODE_DSEQ:
8684    case TGSI_OPCODE_DSGE:
8685    case TGSI_OPCODE_DSNE:
8686    case TGSI_OPCODE_DSLT:
8687    case TGSI_OPCODE_D2I:
8688    case TGSI_OPCODE_D2U:
8689       /* Write to 1 or 2 components only */
8690       assert(util_bitcount(writemask) <= 2);
8691       break;
8692    default:
8693       /* XXX this list may be incomplete */
8694       ;
8695    }
8696 }
8697 
8698 
8699 /**
8700  * Double-precision absolute value.
8701  */
8702 static boolean
emit_dabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8703 emit_dabs(struct svga_shader_emitter_v10 *emit,
8704           const struct tgsi_full_instruction *inst)
8705 {
8706    assert(emit->version >= 50);
8707    check_double_src_swizzle(&inst->Src[0]);
8708    check_double_dst_writemask(inst);
8709 
8710    struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]);
8711 
8712    /* DMOV dst, |src| */
8713    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
8714 
8715    return TRUE;
8716 }
8717 
8718 
8719 /**
8720  * Double-precision negation
8721  */
8722 static boolean
emit_dneg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8723 emit_dneg(struct svga_shader_emitter_v10 *emit,
8724           const struct tgsi_full_instruction *inst)
8725 {
8726    assert(emit->version >= 50);
8727    check_double_src_swizzle(&inst->Src[0]);
8728    check_double_dst_writemask(inst);
8729 
8730    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
8731 
8732    /* DMOV dst, -src */
8733    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
8734 
8735    return TRUE;
8736 }
8737 
8738 
8739 /**
8740  * SM5 has no DMAD opcode.  Implement negation with DMUL/DADD.
8741  */
8742 static boolean
emit_dmad(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8743 emit_dmad(struct svga_shader_emitter_v10 *emit,
8744           const struct tgsi_full_instruction *inst)
8745 {
8746    assert(emit->version >= 50);
8747    check_double_src_swizzle(&inst->Src[0]);
8748    check_double_src_swizzle(&inst->Src[1]);
8749    check_double_src_swizzle(&inst->Src[2]);
8750    check_double_dst_writemask(inst);
8751 
8752    unsigned tmp = get_temp_index(emit);
8753    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8754    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8755 
8756    /* DMUL tmp, src[0], src[1] */
8757    emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
8758                         &tmp_dst, &inst->Src[0], &inst->Src[1], NULL,
8759                         FALSE, inst->Instruction.Precise);
8760 
8761    /* DADD dst, tmp, src[2] */
8762    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
8763                         &inst->Dst[0], &tmp_src, &inst->Src[2], NULL,
8764                         inst->Instruction.Saturate, inst->Instruction.Precise);
8765    free_temp_indexes(emit);
8766 
8767    return TRUE;
8768 }
8769 
8770 
8771 /**
8772  * Double precision reciprocal square root
8773  */
8774 static boolean
emit_drsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)8775 emit_drsq(struct svga_shader_emitter_v10 *emit,
8776           const struct tgsi_full_dst_register *dst,
8777           const struct tgsi_full_src_register *src)
8778 {
8779    assert(emit->version >= 50);
8780 
8781    VGPU10OpcodeToken0 token0;
8782    begin_emit_instruction(emit);
8783 
8784    token0.value = 0;
8785    token0.opcodeType = VGPU10_OPCODE_VMWARE;
8786    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
8787    emit_dword(emit, token0.value);
8788 
8789    emit_dst_register(emit, dst);
8790 
8791    check_double_src_swizzle(src);
8792    emit_src_register(emit, src);
8793 
8794    end_emit_instruction(emit);
8795 
8796    return TRUE;
8797 }
8798 
8799 
8800 /**
8801  * There is no SM5 opcode for double precision square root.
8802  * It will be implemented with DRSQ.
8803  * dst = src * DRSQ(src)
8804  */
8805 static boolean
emit_dsqrt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8806 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
8807           const struct tgsi_full_instruction *inst)
8808 {
8809    assert(emit->version >= 50);
8810 
8811    check_double_src_swizzle(&inst->Src[0]);
8812 
8813    /* temporary register to hold the source */
8814    unsigned tmp = get_temp_index(emit);
8815    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8816    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8817 
8818    /* temporary register to hold the DEQ result */
8819    unsigned tmp_cond = get_temp_index(emit);
8820    struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
8821    struct tgsi_full_dst_register tmp_cond_dst_xy =
8822       writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8823    struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
8824    struct tgsi_full_src_register tmp_cond_src_xy =
8825          swizzle_src(&tmp_cond_src,
8826                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
8827                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
8828 
8829    /* The reciprocal square root of zero yields INF.
8830     * So if the source is 0, we replace it with 1 in the tmp register.
8831     * The later multiplication of zero in the original source will yield 0
8832     * in the result.
8833     */
8834 
8835    /* tmp1 = (src == 0) ? 1 : src;
8836     *   EQ tmp1, 0, src
8837     *   MOVC tmp, tmp1, 1.0, src
8838     */
8839    struct tgsi_full_src_register zero =
8840                make_immediate_reg_double(emit, 0);
8841 
8842    struct tgsi_full_src_register one =
8843                make_immediate_reg_double(emit, 1.0);
8844 
8845    emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
8846                         &zero, &inst->Src[0]);
8847    emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
8848                         &tmp_cond_src_xy, &one, &inst->Src[0]);
8849 
8850    struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
8851    struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
8852 
8853    /* DRSQ tmp_rsq, tmp */
8854    emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
8855 
8856    /* DMUL dst, tmp_rsq, src[0] */
8857    emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
8858                         &tmp_rsq_src, &inst->Src[0]);
8859 
8860    free_temp_indexes(emit);
8861 
8862    return TRUE;
8863 }
8864 
8865 
8866 static boolean
emit_interp_offset(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8867 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
8868                    const struct tgsi_full_instruction *inst)
8869 {
8870    assert(emit->version >= 50);
8871 
8872    /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
8873     * where (0,0) is the center of the pixel.  We need to translate that
8874     * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
8875     * Also need to flip the Y axis (I think).
8876     */
8877    unsigned tmp = get_temp_index(emit);
8878    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8879    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8880    struct tgsi_full_dst_register tmp_dst_xy =
8881       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8882    struct tgsi_full_src_register const16 =
8883       make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
8884 
8885    /* MUL tmp.xy, src1, {16, -16, 0, 0} */
8886    emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
8887                         &tmp_dst_xy, &inst->Src[1], &const16);
8888 
8889    /* FTOI tmp.xy, tmp */
8890    emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
8891 
8892    /* EVAL_SNAPPED dst, src0, tmp */
8893    emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
8894                         &inst->Dst[0], &inst->Src[0], &tmp_src);
8895 
8896    free_temp_indexes(emit);
8897 
8898    return TRUE;
8899 }
8900 
8901 
8902 /**
8903  * Emit a simple instruction (like ADD, MUL, MIN, etc).
8904  */
8905 static boolean
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8906 emit_simple(struct svga_shader_emitter_v10 *emit,
8907             const struct tgsi_full_instruction *inst)
8908 {
8909    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8910    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
8911    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
8912    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
8913    unsigned i;
8914 
8915    if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
8916       emit->current_loop_depth++;
8917    }
8918    else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
8919       emit->current_loop_depth--;
8920    }
8921 
8922    begin_emit_instruction(emit);
8923    emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
8924                        inst->Instruction.Saturate,
8925                        inst->Instruction.Precise);
8926    for (i = 0; i < op->num_dst; i++) {
8927       if (dbl_dst) {
8928          check_double_dst_writemask(inst);
8929       }
8930       emit_dst_register(emit, &inst->Dst[i]);
8931    }
8932    for (i = 0; i < op->num_src; i++) {
8933       if (dbl_src) {
8934          check_double_src_swizzle(&inst->Src[i]);
8935       }
8936       emit_src_register(emit, &inst->Src[i]);
8937    }
8938    end_emit_instruction(emit);
8939 
8940    return TRUE;
8941 }
8942 
8943 
8944 /**
8945  * Emit MSB instruction (like IMSB, UMSB).
8946  *
8947  * GLSL returns the index starting from the LSB;
8948  * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
8949  * To get correct location as per glsl from SM5 device, we should
8950  * return (31 - index) if returned index is not -1.
8951  */
8952 static boolean
emit_msb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8953 emit_msb(struct svga_shader_emitter_v10 *emit,
8954          const struct tgsi_full_instruction *inst)
8955 {
8956    const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
8957 
8958    assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
8959 
8960    struct tgsi_full_src_register index_src =
8961       make_src_reg(index_dst->Register.File, index_dst->Register.Index);
8962    struct tgsi_full_src_register imm31 =
8963       make_immediate_reg_int(emit, 31);
8964    imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
8965    struct tgsi_full_src_register neg_one =
8966       make_immediate_reg_int(emit, -1);
8967    neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
8968    unsigned tmp = get_temp_index(emit);
8969    const struct tgsi_full_dst_register tmp_dst =
8970       make_dst_temp_reg(tmp);
8971    const struct tgsi_full_dst_register tmp_dst_x =
8972       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8973    const struct tgsi_full_src_register tmp_src_x =
8974        make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
8975    int writemask = TGSI_WRITEMASK_X;
8976    int src_swizzle = TGSI_SWIZZLE_X;
8977    int dst_writemask = index_dst->Register.WriteMask;
8978 
8979    emit_simple(emit, inst);
8980 
8981    /* index conversion from SM5 to GLSL */
8982    while (writemask & dst_writemask) {
8983       struct tgsi_full_src_register index_src_comp =
8984          scalar_src(&index_src, src_swizzle);
8985       struct tgsi_full_dst_register index_dst_comp =
8986          writemask_dst(index_dst, writemask);
8987 
8988       /* check if index_src_comp != -1 */
8989       emit_instruction_op2(emit, VGPU10_OPCODE_INE,
8990                            &tmp_dst_x, &index_src_comp, &neg_one);
8991 
8992       /* if */
8993       emit_if(emit, &tmp_src_x);
8994 
8995       index_src_comp = negate_src(&index_src_comp);
8996       /* SUB DST, IMM{31}, DST */
8997       emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
8998                            &index_dst_comp, &imm31, &index_src_comp);
8999 
9000       /* endif */
9001       emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9002 
9003       writemask = writemask << 1;
9004       src_swizzle = src_swizzle + 1;
9005    }
9006    free_temp_indexes(emit);
9007    return TRUE;
9008 }
9009 
9010 
9011 /**
9012  * Emit a BFE instruction (like UBFE, IBFE).
9013  * tgsi representation:
9014  * U/IBFE dst, value, offset, width
9015  * SM5 representation:
9016  * U/IBFE dst, width, offset, value
9017  * Note: SM5 has width & offset range (0-31);
9018  *      whereas GLSL has width & offset range (0-32)
9019  */
9020 static boolean
emit_bfe(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9021 emit_bfe(struct svga_shader_emitter_v10 *emit,
9022          const struct tgsi_full_instruction *inst)
9023 {
9024    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9025    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9026    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9027    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9028    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9029 
9030    unsigned tmp1 = get_temp_index(emit);
9031    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9032    const struct tgsi_full_dst_register cond1_dst_x =
9033       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9034    const struct tgsi_full_src_register cond1_src_x =
9035       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9036 
9037    unsigned tmp2 = get_temp_index(emit);
9038    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9039    const struct tgsi_full_dst_register cond2_dst_x =
9040       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9041    const struct tgsi_full_src_register cond2_src_x =
9042       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9043 
9044    /**
9045     * In SM5, when width = 32  and offset = 0, it returns 0.
9046     * On the other hand GLSL, expects value to be copied as it is, to dst.
9047     */
9048 
9049    /* cond1 = width ! = 32 */
9050    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9051                         &cond1_dst_x, &inst->Src[2], &imm32);
9052 
9053    /* cond2 = offset ! = 0 */
9054    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9055                         &cond2_dst_x, &inst->Src[1], &zero);
9056 
9057    /* cond 2 = cond1 & cond 2 */
9058    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9059                         &cond2_src_x,
9060                         &cond1_src_x);
9061    /* IF */
9062    emit_if(emit, &cond2_src_x);
9063 
9064    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9065                         &inst->Src[0]);
9066 
9067    /* ELSE */
9068    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9069 
9070    /* U/IBFE dst, width, offset, value */
9071    emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9072                         &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9073 
9074    /* ENDIF */
9075    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9076 
9077    free_temp_indexes(emit);
9078    return TRUE;
9079 }
9080 
9081 
9082 /**
9083  * Emit BFI  instruction
9084  * tgsi representation:
9085  * BFI dst, base, insert, offset, width
9086  * SM5 representation:
9087  * BFI dst, width, offset, insert, base
9088  * Note: SM5 has width & offset range (0-31);
9089  *      whereas GLSL has width & offset range (0-32)
9090  */
9091 static boolean
emit_bfi(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9092 emit_bfi(struct svga_shader_emitter_v10 *emit,
9093          const struct tgsi_full_instruction *inst)
9094 {
9095    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9096    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9097    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9098 
9099    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9100    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9101 
9102    unsigned tmp1 = get_temp_index(emit);
9103    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9104    const struct tgsi_full_dst_register cond1_dst_x =
9105       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9106    const struct tgsi_full_src_register cond1_src_x =
9107       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9108 
9109    unsigned tmp2 = get_temp_index(emit);
9110    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9111    const struct tgsi_full_dst_register cond2_dst_x =
9112       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9113    const struct tgsi_full_src_register cond2_src_x =
9114       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9115 
9116    /**
9117     * In SM5, when width = 32  and offset = 0, it returns 0.
9118     * On the other hand GLSL, expects insert to be copied as it is, to dst.
9119     */
9120 
9121    /* cond1 = width == 32 */
9122    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9123                         &cond1_dst_x, &inst->Src[3], &imm32);
9124 
9125    /* cond1 = offset == 0 */
9126    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9127                         &cond2_dst_x, &inst->Src[2], &zero);
9128 
9129    /* cond2 = cond1 & cond2 */
9130    emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9131                         &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9132 
9133    /* if */
9134    emit_if(emit, &cond2_src_x);
9135 
9136    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9137                         &inst->Src[1]);
9138 
9139    /* else */
9140    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9141 
9142    /* BFI dst, width, offset, insert, base */
9143    begin_emit_instruction(emit);
9144    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9145    emit_dst_register(emit, &inst->Dst[0]);
9146    emit_src_register(emit, &inst->Src[3]);
9147    emit_src_register(emit, &inst->Src[2]);
9148    emit_src_register(emit, &inst->Src[1]);
9149    emit_src_register(emit, &inst->Src[0]);
9150    end_emit_instruction(emit);
9151 
9152    /* endif */
9153    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9154 
9155    free_temp_indexes(emit);
9156    return TRUE;
9157 }
9158 
9159 
9160 /**
9161  * We only special case the MOV instruction to try to detect constant
9162  * color writes in the fragment shader.
9163  */
9164 static boolean
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9165 emit_mov(struct svga_shader_emitter_v10 *emit,
9166          const struct tgsi_full_instruction *inst)
9167 {
9168    const struct tgsi_full_src_register *src = &inst->Src[0];
9169    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9170 
9171    if (emit->unit == PIPE_SHADER_FRAGMENT &&
9172        dst->Register.File == TGSI_FILE_OUTPUT &&
9173        dst->Register.Index == 0 &&
9174        src->Register.File == TGSI_FILE_CONSTANT &&
9175        !src->Register.Indirect) {
9176       emit->constant_color_output = TRUE;
9177    }
9178 
9179    return emit_simple(emit, inst);
9180 }
9181 
9182 
9183 /**
9184  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9185  * where TGSI only uses one dest register.
9186  */
9187 static boolean
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)9188 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
9189                  const struct tgsi_full_instruction *inst,
9190                  unsigned dst_count,
9191                  unsigned dst_index)
9192 {
9193    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9194    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9195    unsigned i;
9196 
9197    begin_emit_instruction(emit);
9198    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9199 
9200    for (i = 0; i < dst_count; i++) {
9201       if (i == dst_index) {
9202          emit_dst_register(emit, &inst->Dst[0]);
9203       } else {
9204          emit_null_dst_register(emit);
9205       }
9206    }
9207 
9208    for (i = 0; i < op->num_src; i++) {
9209       emit_src_register(emit, &inst->Src[i]);
9210    }
9211    end_emit_instruction(emit);
9212 
9213    return TRUE;
9214 }
9215 
9216 
9217 /**
9218  * Emit a vmware specific VGPU10 instruction.
9219  */
9220 static boolean
emit_vmware(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_VMWARE_OPCODE_TYPE subopcode)9221 emit_vmware(struct svga_shader_emitter_v10 *emit,
9222             const struct tgsi_full_instruction *inst,
9223             VGPU10_VMWARE_OPCODE_TYPE subopcode)
9224 {
9225    VGPU10OpcodeToken0 token0;
9226    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9227    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9228    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9229    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9230 
9231    unsigned i;
9232 
9233    begin_emit_instruction(emit);
9234 
9235    assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
9236 
9237    token0.value = 0;
9238    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9239    token0.vmwareOpcodeType = subopcode;
9240    emit_dword(emit, token0.value);
9241 
9242    if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
9243       /* IDIV only uses the first dest register. */
9244       emit_dst_register(emit, &inst->Dst[0]);
9245       emit_null_dst_register(emit);
9246    } else {
9247       for (i = 0; i < op->num_dst; i++) {
9248          if (dbl_dst) {
9249             check_double_dst_writemask(inst);
9250          }
9251          emit_dst_register(emit, &inst->Dst[i]);
9252       }
9253    }
9254 
9255    for (i = 0; i < op->num_src; i++) {
9256       if (dbl_src) {
9257          check_double_src_swizzle(&inst->Src[i]);
9258       }
9259       emit_src_register(emit, &inst->Src[i]);
9260    }
9261    end_emit_instruction(emit);
9262 
9263    return TRUE;
9264 }
9265 
9266 
9267 /**
9268  * Translate a single TGSI instruction to VGPU10.
9269  */
9270 static boolean
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)9271 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
9272                         unsigned inst_number,
9273                         const struct tgsi_full_instruction *inst)
9274 {
9275    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9276 
9277    if (emit->skip_instruction)
9278       return TRUE;
9279 
9280    switch (opcode) {
9281    case TGSI_OPCODE_ADD:
9282    case TGSI_OPCODE_AND:
9283    case TGSI_OPCODE_BGNLOOP:
9284    case TGSI_OPCODE_BRK:
9285    case TGSI_OPCODE_CEIL:
9286    case TGSI_OPCODE_CONT:
9287    case TGSI_OPCODE_DDX:
9288    case TGSI_OPCODE_DDY:
9289    case TGSI_OPCODE_DIV:
9290    case TGSI_OPCODE_DP2:
9291    case TGSI_OPCODE_DP3:
9292    case TGSI_OPCODE_DP4:
9293    case TGSI_OPCODE_ELSE:
9294    case TGSI_OPCODE_ENDIF:
9295    case TGSI_OPCODE_ENDLOOP:
9296    case TGSI_OPCODE_ENDSUB:
9297    case TGSI_OPCODE_F2I:
9298    case TGSI_OPCODE_F2U:
9299    case TGSI_OPCODE_FLR:
9300    case TGSI_OPCODE_FRC:
9301    case TGSI_OPCODE_FSEQ:
9302    case TGSI_OPCODE_FSGE:
9303    case TGSI_OPCODE_FSLT:
9304    case TGSI_OPCODE_FSNE:
9305    case TGSI_OPCODE_I2F:
9306    case TGSI_OPCODE_IMAX:
9307    case TGSI_OPCODE_IMIN:
9308    case TGSI_OPCODE_INEG:
9309    case TGSI_OPCODE_ISGE:
9310    case TGSI_OPCODE_ISHR:
9311    case TGSI_OPCODE_ISLT:
9312    case TGSI_OPCODE_MAD:
9313    case TGSI_OPCODE_MAX:
9314    case TGSI_OPCODE_MIN:
9315    case TGSI_OPCODE_MUL:
9316    case TGSI_OPCODE_NOP:
9317    case TGSI_OPCODE_NOT:
9318    case TGSI_OPCODE_OR:
9319    case TGSI_OPCODE_UADD:
9320    case TGSI_OPCODE_USEQ:
9321    case TGSI_OPCODE_USGE:
9322    case TGSI_OPCODE_USLT:
9323    case TGSI_OPCODE_UMIN:
9324    case TGSI_OPCODE_UMAD:
9325    case TGSI_OPCODE_UMAX:
9326    case TGSI_OPCODE_ROUND:
9327    case TGSI_OPCODE_SQRT:
9328    case TGSI_OPCODE_SHL:
9329    case TGSI_OPCODE_TRUNC:
9330    case TGSI_OPCODE_U2F:
9331    case TGSI_OPCODE_UCMP:
9332    case TGSI_OPCODE_USHR:
9333    case TGSI_OPCODE_USNE:
9334    case TGSI_OPCODE_XOR:
9335    /* Begin SM5 opcodes */
9336    case TGSI_OPCODE_F2D:
9337    case TGSI_OPCODE_D2F:
9338    case TGSI_OPCODE_DADD:
9339    case TGSI_OPCODE_DMUL:
9340    case TGSI_OPCODE_DMAX:
9341    case TGSI_OPCODE_DMIN:
9342    case TGSI_OPCODE_DSGE:
9343    case TGSI_OPCODE_DSLT:
9344    case TGSI_OPCODE_DSEQ:
9345    case TGSI_OPCODE_DSNE:
9346    case TGSI_OPCODE_BREV:
9347    case TGSI_OPCODE_POPC:
9348    case TGSI_OPCODE_LSB:
9349    case TGSI_OPCODE_INTERP_CENTROID:
9350    case TGSI_OPCODE_INTERP_SAMPLE:
9351       /* simple instructions */
9352       return emit_simple(emit, inst);
9353    case TGSI_OPCODE_RET:
9354       if (emit->unit == PIPE_SHADER_TESS_CTRL &&
9355           !emit->tcs.control_point_phase) {
9356 
9357          /* store the tessellation levels in the patch constant phase only */
9358          store_tesslevels(emit);
9359       }
9360       return emit_simple(emit, inst);
9361 
9362    case TGSI_OPCODE_IMSB:
9363    case TGSI_OPCODE_UMSB:
9364       return emit_msb(emit, inst);
9365    case TGSI_OPCODE_IBFE:
9366    case TGSI_OPCODE_UBFE:
9367       return emit_bfe(emit, inst);
9368    case TGSI_OPCODE_BFI:
9369       return emit_bfi(emit, inst);
9370    case TGSI_OPCODE_MOV:
9371       return emit_mov(emit, inst);
9372    case TGSI_OPCODE_EMIT:
9373       return emit_vertex(emit, inst);
9374    case TGSI_OPCODE_ENDPRIM:
9375       return emit_endprim(emit, inst);
9376    case TGSI_OPCODE_IABS:
9377       return emit_iabs(emit, inst);
9378    case TGSI_OPCODE_ARL:
9379       FALLTHROUGH;
9380    case TGSI_OPCODE_UARL:
9381       return emit_arl_uarl(emit, inst);
9382    case TGSI_OPCODE_BGNSUB:
9383       /* no-op */
9384       return TRUE;
9385    case TGSI_OPCODE_CAL:
9386       return emit_cal(emit, inst);
9387    case TGSI_OPCODE_CMP:
9388       return emit_cmp(emit, inst);
9389    case TGSI_OPCODE_COS:
9390       return emit_sincos(emit, inst);
9391    case TGSI_OPCODE_DST:
9392       return emit_dst(emit, inst);
9393    case TGSI_OPCODE_EX2:
9394       return emit_ex2(emit, inst);
9395    case TGSI_OPCODE_EXP:
9396       return emit_exp(emit, inst);
9397    case TGSI_OPCODE_IF:
9398       return emit_if(emit, &inst->Src[0]);
9399    case TGSI_OPCODE_KILL:
9400       return emit_kill(emit, inst);
9401    case TGSI_OPCODE_KILL_IF:
9402       return emit_kill_if(emit, inst);
9403    case TGSI_OPCODE_LG2:
9404       return emit_lg2(emit, inst);
9405    case TGSI_OPCODE_LIT:
9406       return emit_lit(emit, inst);
9407    case TGSI_OPCODE_LODQ:
9408       return emit_lodq(emit, inst);
9409    case TGSI_OPCODE_LOG:
9410       return emit_log(emit, inst);
9411    case TGSI_OPCODE_LRP:
9412       return emit_lrp(emit, inst);
9413    case TGSI_OPCODE_POW:
9414       return emit_pow(emit, inst);
9415    case TGSI_OPCODE_RCP:
9416       return emit_rcp(emit, inst);
9417    case TGSI_OPCODE_RSQ:
9418       return emit_rsq(emit, inst);
9419    case TGSI_OPCODE_SAMPLE:
9420       return emit_sample(emit, inst);
9421    case TGSI_OPCODE_SEQ:
9422       return emit_seq(emit, inst);
9423    case TGSI_OPCODE_SGE:
9424       return emit_sge(emit, inst);
9425    case TGSI_OPCODE_SGT:
9426       return emit_sgt(emit, inst);
9427    case TGSI_OPCODE_SIN:
9428       return emit_sincos(emit, inst);
9429    case TGSI_OPCODE_SLE:
9430       return emit_sle(emit, inst);
9431    case TGSI_OPCODE_SLT:
9432       return emit_slt(emit, inst);
9433    case TGSI_OPCODE_SNE:
9434       return emit_sne(emit, inst);
9435    case TGSI_OPCODE_SSG:
9436       return emit_ssg(emit, inst);
9437    case TGSI_OPCODE_ISSG:
9438       return emit_issg(emit, inst);
9439    case TGSI_OPCODE_TEX:
9440       return emit_tex(emit, inst);
9441    case TGSI_OPCODE_TG4:
9442       return emit_tg4(emit, inst);
9443    case TGSI_OPCODE_TEX2:
9444       return emit_tex2(emit, inst);
9445    case TGSI_OPCODE_TXP:
9446       return emit_txp(emit, inst);
9447    case TGSI_OPCODE_TXB:
9448    case TGSI_OPCODE_TXB2:
9449    case TGSI_OPCODE_TXL:
9450       return emit_txl_txb(emit, inst);
9451    case TGSI_OPCODE_TXD:
9452       return emit_txd(emit, inst);
9453    case TGSI_OPCODE_TXF:
9454       return emit_txf(emit, inst);
9455    case TGSI_OPCODE_TXL2:
9456       return emit_txl2(emit, inst);
9457    case TGSI_OPCODE_TXQ:
9458       return emit_txq(emit, inst);
9459    case TGSI_OPCODE_UIF:
9460       return emit_if(emit, &inst->Src[0]);
9461    case TGSI_OPCODE_UMUL_HI:
9462    case TGSI_OPCODE_IMUL_HI:
9463    case TGSI_OPCODE_UDIV:
9464       /* These cases use only the FIRST of two destination registers */
9465       return emit_simple_1dst(emit, inst, 2, 0);
9466    case TGSI_OPCODE_IDIV:
9467       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
9468    case TGSI_OPCODE_UMUL:
9469    case TGSI_OPCODE_UMOD:
9470    case TGSI_OPCODE_MOD:
9471       /* These cases use only the SECOND of two destination registers */
9472       return emit_simple_1dst(emit, inst, 2, 1);
9473 
9474    /* Begin SM5 opcodes */
9475    case TGSI_OPCODE_DABS:
9476       return emit_dabs(emit, inst);
9477    case TGSI_OPCODE_DNEG:
9478       return emit_dneg(emit, inst);
9479    case TGSI_OPCODE_DRCP:
9480       return emit_simple(emit, inst);
9481    case TGSI_OPCODE_DSQRT:
9482       return emit_dsqrt(emit, inst);
9483    case TGSI_OPCODE_DMAD:
9484       return emit_dmad(emit, inst);
9485    case TGSI_OPCODE_DFRAC:
9486       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
9487    case TGSI_OPCODE_D2I:
9488    case TGSI_OPCODE_D2U:
9489       return emit_simple(emit, inst);
9490    case TGSI_OPCODE_I2D:
9491    case TGSI_OPCODE_U2D:
9492       return emit_simple(emit, inst);
9493    case TGSI_OPCODE_DRSQ:
9494       return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
9495    case TGSI_OPCODE_DDIV:
9496       return emit_simple(emit, inst);
9497    case TGSI_OPCODE_INTERP_OFFSET:
9498       return emit_interp_offset(emit, inst);
9499 
9500    /* The following opcodes should never be seen here.  We return zero
9501     * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
9502     * FMA_SUPPORTED, LDEXP_SUPPORTED queries.
9503     */
9504    case TGSI_OPCODE_FMA:
9505    case TGSI_OPCODE_LDEXP:
9506    case TGSI_OPCODE_DSSG:
9507    case TGSI_OPCODE_DFRACEXP:
9508    case TGSI_OPCODE_DLDEXP:
9509    case TGSI_OPCODE_DTRUNC:
9510    case TGSI_OPCODE_DCEIL:
9511    case TGSI_OPCODE_DFLR:
9512       debug_printf("Unexpected TGSI opcode %s.  "
9513                    "Should have been translated away by the GLSL compiler.\n",
9514                    tgsi_get_opcode_name(opcode));
9515       return FALSE;
9516 
9517    case TGSI_OPCODE_LOAD:
9518    case TGSI_OPCODE_STORE:
9519    case TGSI_OPCODE_ATOMAND:
9520    case TGSI_OPCODE_ATOMCAS:
9521    case TGSI_OPCODE_ATOMIMAX:
9522    case TGSI_OPCODE_ATOMIMIN:
9523    case TGSI_OPCODE_ATOMOR:
9524    case TGSI_OPCODE_ATOMUADD:
9525    case TGSI_OPCODE_ATOMUMAX:
9526    case TGSI_OPCODE_ATOMUMIN:
9527    case TGSI_OPCODE_ATOMXCHG:
9528    case TGSI_OPCODE_ATOMXOR:
9529       return FALSE;
9530    case TGSI_OPCODE_BARRIER:
9531       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
9532          /* SM5 device doesn't support BARRIER in tcs . If barrier is used
9533           * in shader, don't do anything for this opcode and continue rest
9534           * of shader translation
9535           */
9536          pipe_debug_message(&emit->svga_debug_callback, INFO,
9537                             "barrier instruction is not supported in tessellation control shader\n");
9538          return TRUE;
9539       }
9540       else {
9541          return emit_simple(emit, inst);
9542       }
9543 
9544    case TGSI_OPCODE_END:
9545       if (!emit_post_helpers(emit))
9546          return FALSE;
9547       return emit_simple(emit, inst);
9548 
9549    default:
9550       debug_printf("Unimplemented tgsi instruction %s\n",
9551                    tgsi_get_opcode_name(opcode));
9552       return FALSE;
9553    }
9554 
9555    return TRUE;
9556 }
9557 
9558 
9559 /**
9560  * Emit the extra instructions to adjust the vertex position.
9561  * There are two possible adjustments:
9562  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
9563  *    "prescale" and "pretranslate" values.
9564  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
9565  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
9566  */
9567 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit)9568 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
9569 {
9570    struct tgsi_full_src_register tmp_pos_src;
9571    struct tgsi_full_dst_register pos_dst;
9572    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
9573 
9574    /* Don't bother to emit any extra vertex instructions if vertex position is
9575     * not written out
9576     */
9577    if (emit->vposition.out_index == INVALID_INDEX)
9578       return;
9579 
9580    /**
9581     * Reset the temporary vertex position register index
9582     * so that emit_dst_register() will use the real vertex position output
9583     */
9584    emit->vposition.tmp_index = INVALID_INDEX;
9585 
9586    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
9587    pos_dst = make_dst_output_reg(emit->vposition.out_index);
9588 
9589    /* If non-adjusted vertex position register index
9590     * is valid, copy the vertex position from the temporary
9591     * vertex position register before it is modified by the
9592     * prescale computation.
9593     */
9594    if (emit->vposition.so_index != INVALID_INDEX) {
9595       struct tgsi_full_dst_register pos_so_dst =
9596          make_dst_output_reg(emit->vposition.so_index);
9597 
9598       /* MOV pos_so, tmp_pos */
9599       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
9600    }
9601 
9602    if (emit->vposition.need_prescale) {
9603       /* This code adjusts the vertex position to match the VGPU10 convention.
9604        * If p is the position computed by the shader (usually by applying the
9605        * modelview and projection matrices), the new position q is computed by:
9606        *
9607        * q.x = p.w * trans.x + p.x * scale.x
9608        * q.y = p.w * trans.y + p.y * scale.y
9609        * q.z = p.w * trans.z + p.z * scale.z;
9610        * q.w = p.w * trans.w + p.w;
9611        */
9612       struct tgsi_full_src_register tmp_pos_src_w =
9613          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9614       struct tgsi_full_dst_register tmp_pos_dst =
9615          make_dst_temp_reg(vs_pos_tmp_index);
9616       struct tgsi_full_dst_register tmp_pos_dst_xyz =
9617          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
9618 
9619       struct tgsi_full_src_register prescale_scale =
9620          make_src_temp_reg(emit->vposition.prescale_scale_index);
9621       struct tgsi_full_src_register prescale_trans =
9622          make_src_temp_reg(emit->vposition.prescale_trans_index);
9623 
9624       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
9625       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
9626                            &tmp_pos_src, &prescale_scale);
9627 
9628       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
9629       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
9630                            &prescale_trans, &tmp_pos_src);
9631    }
9632    else if (emit->key.vs.undo_viewport) {
9633       /* This code computes the final vertex position from the temporary
9634        * vertex position by undoing the viewport transformation and the
9635        * divide-by-W operation (we convert window coords back to clip coords).
9636        * This is needed when we use the 'draw' module for fallbacks.
9637        * If p is the temp pos in window coords, then the NDC coord q is:
9638        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
9639        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
9640        *   q.z = p.z * p.w
9641        *   q.w = p.w
9642        * CONST[vs_viewport_index] contains:
9643        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
9644        */
9645       struct tgsi_full_dst_register tmp_pos_dst =
9646          make_dst_temp_reg(vs_pos_tmp_index);
9647       struct tgsi_full_dst_register tmp_pos_dst_xy =
9648          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
9649       struct tgsi_full_src_register tmp_pos_src_wwww =
9650          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9651 
9652       struct tgsi_full_dst_register pos_dst_xyz =
9653          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
9654       struct tgsi_full_dst_register pos_dst_w =
9655          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
9656 
9657       struct tgsi_full_src_register vp_xyzw =
9658          make_src_const_reg(emit->vs.viewport_index);
9659       struct tgsi_full_src_register vp_zwww =
9660          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
9661                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
9662 
9663       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
9664       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
9665                            &tmp_pos_src, &vp_zwww);
9666 
9667       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
9668       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
9669                            &tmp_pos_src, &vp_xyzw);
9670 
9671       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
9672       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
9673                            &tmp_pos_src, &tmp_pos_src_wwww);
9674 
9675       /* MOV pos.w, tmp_pos.w */
9676       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
9677    }
9678    else if (vs_pos_tmp_index != INVALID_INDEX) {
9679       /* This code is to handle the case where the temporary vertex
9680        * position register is created when the vertex shader has stream
9681        * output and prescale is disabled because rasterization is to be
9682        * discarded.
9683        */
9684       struct tgsi_full_dst_register pos_dst =
9685          make_dst_output_reg(emit->vposition.out_index);
9686 
9687       /* MOV pos, tmp_pos */
9688       begin_emit_instruction(emit);
9689       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9690       emit_dst_register(emit, &pos_dst);
9691       emit_src_register(emit, &tmp_pos_src);
9692       end_emit_instruction(emit);
9693    }
9694 
9695    /* Restore original vposition.tmp_index value for the next GS vertex.
9696     * It doesn't matter for VS.
9697     */
9698    emit->vposition.tmp_index = vs_pos_tmp_index;
9699 }
9700 
9701 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)9702 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
9703 {
9704    if (emit->clip_mode == CLIP_DISTANCE) {
9705       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
9706       emit_clip_distance_instructions(emit);
9707 
9708    } else if (emit->clip_mode == CLIP_VERTEX &&
9709               emit->key.last_vertex_stage) {
9710       /* Convert TGSI CLIPVERTEX to CLIPDIST */
9711       emit_clip_vertex_instructions(emit);
9712    }
9713 
9714    /**
9715     * Emit vertex position and take care of legacy user planes only if
9716     * there is a valid vertex position register index.
9717     * This is to take care of the case
9718     * where the shader doesn't output vertex position. Then in
9719     * this case, don't bother to emit more vertex instructions.
9720     */
9721    if (emit->vposition.out_index == INVALID_INDEX)
9722       return;
9723 
9724    /**
9725     * Emit per-vertex clipping instructions for legacy user defined clip planes.
9726     * NOTE: we must emit the clip distance instructions before the
9727     * emit_vpos_instructions() call since the later function will change
9728     * the TEMP[vs_pos_tmp_index] value.
9729     */
9730    if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
9731       /* Emit CLIPDIST for legacy user defined clip planes */
9732       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
9733    }
9734 }
9735 
9736 
9737 /**
9738  * Emit extra per-vertex instructions.  This includes clip-coordinate
9739  * space conversion and computing clip distances.  This is called for
9740  * each GS emit-vertex instruction and at the end of VS translation.
9741  */
9742 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)9743 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
9744 {
9745    /* Emit clipping instructions based on clipping mode */
9746    emit_clipping_instructions(emit);
9747 
9748    /* Emit vertex position instructions */
9749    emit_vpos_instructions(emit);
9750 }
9751 
9752 
9753 /**
9754  * Translate the TGSI_OPCODE_EMIT GS instruction.
9755  */
9756 static boolean
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9757 emit_vertex(struct svga_shader_emitter_v10 *emit,
9758             const struct tgsi_full_instruction *inst)
9759 {
9760    unsigned ret = TRUE;
9761 
9762    assert(emit->unit == PIPE_SHADER_GEOMETRY);
9763 
9764    /**
9765     * Emit the viewport array index for the first vertex.
9766     */
9767    if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
9768       struct tgsi_full_dst_register viewport_index_out =
9769          make_dst_output_reg(emit->gs.viewport_index_out_index);
9770       struct tgsi_full_dst_register viewport_index_out_x =
9771          writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
9772       struct tgsi_full_src_register viewport_index_tmp =
9773          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
9774 
9775       /* Set the out index to INVALID_INDEX, so it will not
9776        * be assigned to a temp again in emit_dst_register, and
9777        * the viewport index will not be assigned again in the
9778        * subsequent vertices.
9779        */
9780       emit->gs.viewport_index_out_index = INVALID_INDEX;
9781       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9782                            &viewport_index_out_x, &viewport_index_tmp);
9783    }
9784 
9785    /**
9786     * Find the stream index associated with this emit vertex instruction.
9787     */
9788    assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
9789    unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
9790 
9791    /**
9792     * According to the ARB_gpu_shader5 spec, the built-in geometry shader
9793     * outputs are always associated with vertex stream zero.
9794     * So emit the extra vertex instructions for position or clip distance
9795     * for stream zero only.
9796     */
9797    if (streamIndex == 0) {
9798       /**
9799        * Before emitting vertex instructions, emit the temporaries for
9800        * the prescale constants based on the viewport index if needed.
9801        */
9802       if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
9803          emit_temp_prescale_instructions(emit);
9804 
9805       emit_vertex_instructions(emit);
9806    }
9807 
9808    begin_emit_instruction(emit);
9809    if (emit->version >= 50) {
9810       if (emit->info.num_stream_output_components[streamIndex] == 0) {
9811          /**
9812           * If there is no output for this stream, discard this instruction.
9813           */
9814          emit->discard_instruction = TRUE;
9815       }
9816       else {
9817          emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
9818          emit_stream_register(emit, streamIndex);
9819       }
9820    }
9821    else {
9822       emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
9823    }
9824    end_emit_instruction(emit);
9825 
9826    return ret;
9827 }
9828 
9829 
9830 /**
9831  * Emit the extra code to convert from VGPU10's boolean front-face
9832  * register to TGSI's signed front-face register.
9833  *
9834  * TODO: Make temporary front-face register a scalar.
9835  */
9836 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)9837 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
9838 {
9839    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9840 
9841    if (emit->fs.face_input_index != INVALID_INDEX) {
9842       /* convert vgpu10 boolean face register to gallium +/-1 value */
9843       struct tgsi_full_dst_register tmp_dst =
9844          make_dst_temp_reg(emit->fs.face_tmp_index);
9845       struct tgsi_full_src_register one =
9846          make_immediate_reg_float(emit, 1.0f);
9847       struct tgsi_full_src_register neg_one =
9848          make_immediate_reg_float(emit, -1.0f);
9849 
9850       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
9851       begin_emit_instruction(emit);
9852       emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
9853       emit_dst_register(emit, &tmp_dst);
9854       emit_face_register(emit);
9855       emit_src_register(emit, &one);
9856       emit_src_register(emit, &neg_one);
9857       end_emit_instruction(emit);
9858    }
9859 }
9860 
9861 
9862 /**
9863  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
9864  */
9865 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)9866 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
9867 {
9868    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9869 
9870    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
9871       struct tgsi_full_dst_register tmp_dst =
9872          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
9873       struct tgsi_full_dst_register tmp_dst_xyz =
9874          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
9875       struct tgsi_full_dst_register tmp_dst_w =
9876          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9877       struct tgsi_full_src_register one =
9878          make_immediate_reg_float(emit, 1.0f);
9879       struct tgsi_full_src_register fragcoord =
9880          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
9881 
9882       /* save the input index */
9883       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
9884       /* set to invalid to prevent substitution in emit_src_register() */
9885       emit->fs.fragcoord_input_index = INVALID_INDEX;
9886 
9887       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
9888       begin_emit_instruction(emit);
9889       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9890       emit_dst_register(emit, &tmp_dst_xyz);
9891       emit_src_register(emit, &fragcoord);
9892       end_emit_instruction(emit);
9893 
9894       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
9895       begin_emit_instruction(emit);
9896       emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
9897       emit_dst_register(emit, &tmp_dst_w);
9898       emit_src_register(emit, &one);
9899       emit_src_register(emit, &fragcoord);
9900       end_emit_instruction(emit);
9901 
9902       /* restore saved value */
9903       emit->fs.fragcoord_input_index = fragcoord_input_index;
9904    }
9905 }
9906 
9907 
9908 /**
9909  * Emit the extra code to get the current sample position value and
9910  * put it into a temp register.
9911  */
9912 static void
emit_sample_position_instructions(struct svga_shader_emitter_v10 * emit)9913 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
9914 {
9915    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9916 
9917    if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
9918       assert(emit->version >= 41);
9919 
9920       struct tgsi_full_dst_register tmp_dst =
9921          make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
9922       struct tgsi_full_src_register half =
9923          make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
9924 
9925       struct tgsi_full_src_register tmp_src =
9926          make_src_temp_reg(emit->fs.sample_pos_tmp_index);
9927       struct tgsi_full_src_register sample_index_reg =
9928          make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
9929                              emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
9930 
9931       /* The first src register is a shader resource (if we want a
9932        * multisampled resource sample position) or the rasterizer register
9933        * (if we want the current sample position in the color buffer).  We
9934        * want the later.
9935        */
9936 
9937       /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
9938       begin_emit_instruction(emit);
9939       emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
9940       emit_dst_register(emit, &tmp_dst);
9941       emit_rasterizer_register(emit);
9942       emit_src_register(emit, &sample_index_reg);
9943       end_emit_instruction(emit);
9944 
9945       /* Convert from D3D coords to GL coords by adding 0.5 bias */
9946       /* ADD dst, dst, half */
9947       begin_emit_instruction(emit);
9948       emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
9949       emit_dst_register(emit, &tmp_dst);
9950       emit_src_register(emit, &tmp_src);
9951       emit_src_register(emit, &half);
9952       end_emit_instruction(emit);
9953    }
9954 }
9955 
9956 
9957 /**
9958  * Emit extra instructions to adjust VS inputs/attributes.  This can
9959  * mean casting a vertex attribute from int to float or setting the
9960  * W component to 1, or both.
9961  */
9962 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)9963 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
9964 {
9965    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
9966    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
9967    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
9968    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
9969    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
9970    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
9971    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
9972 
9973    unsigned adjust_mask = (save_w_1_mask |
9974                            save_itof_mask |
9975                            save_utof_mask |
9976                            save_is_bgra_mask |
9977                            save_puint_to_snorm_mask |
9978                            save_puint_to_uscaled_mask |
9979                            save_puint_to_sscaled_mask);
9980 
9981    assert(emit->unit == PIPE_SHADER_VERTEX);
9982 
9983    if (adjust_mask) {
9984       struct tgsi_full_src_register one =
9985          make_immediate_reg_float(emit, 1.0f);
9986 
9987       struct tgsi_full_src_register one_int =
9988          make_immediate_reg_int(emit, 1);
9989 
9990       /* We need to turn off these bitmasks while emitting the
9991        * instructions below, then restore them afterward.
9992        */
9993       emit->key.vs.adjust_attrib_w_1 = 0;
9994       emit->key.vs.adjust_attrib_itof = 0;
9995       emit->key.vs.adjust_attrib_utof = 0;
9996       emit->key.vs.attrib_is_bgra = 0;
9997       emit->key.vs.attrib_puint_to_snorm = 0;
9998       emit->key.vs.attrib_puint_to_uscaled = 0;
9999       emit->key.vs.attrib_puint_to_sscaled = 0;
10000 
10001       while (adjust_mask) {
10002          unsigned index = u_bit_scan(&adjust_mask);
10003 
10004          /* skip the instruction if this vertex attribute is not being used */
10005          if (emit->info.input_usage_mask[index] == 0)
10006             continue;
10007 
10008          unsigned tmp = emit->vs.adjusted_input[index];
10009          struct tgsi_full_src_register input_src =
10010             make_src_reg(TGSI_FILE_INPUT, index);
10011 
10012          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10013          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10014          struct tgsi_full_dst_register tmp_dst_w =
10015             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
10016 
10017          /* ITOF/UTOF/MOV tmp, input[index] */
10018          if (save_itof_mask & (1 << index)) {
10019             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
10020                                  &tmp_dst, &input_src);
10021          }
10022          else if (save_utof_mask & (1 << index)) {
10023             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
10024                                  &tmp_dst, &input_src);
10025          }
10026          else if (save_puint_to_snorm_mask & (1 << index)) {
10027             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
10028          }
10029          else if (save_puint_to_uscaled_mask & (1 << index)) {
10030             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
10031          }
10032          else if (save_puint_to_sscaled_mask & (1 << index)) {
10033             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
10034          }
10035          else {
10036             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
10037             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10038                                  &tmp_dst, &input_src);
10039          }
10040 
10041          if (save_is_bgra_mask & (1 << index)) {
10042             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
10043          }
10044 
10045          if (save_w_1_mask & (1 << index)) {
10046             /* MOV tmp.w, 1.0 */
10047             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
10048                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10049                                     &tmp_dst_w, &one_int);
10050             }
10051             else {
10052                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10053                                     &tmp_dst_w, &one);
10054             }
10055          }
10056       }
10057 
10058       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
10059       emit->key.vs.adjust_attrib_itof = save_itof_mask;
10060       emit->key.vs.adjust_attrib_utof = save_utof_mask;
10061       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
10062       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
10063       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
10064       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
10065    }
10066 }
10067 
10068 
10069 /* Find zero-value immedate for default layer index */
10070 static void
emit_default_layer_instructions(struct svga_shader_emitter_v10 * emit)10071 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
10072 {
10073    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10074 
10075    /* immediate for default layer index 0 */
10076    if (emit->fs.layer_input_index != INVALID_INDEX) {
10077       union tgsi_immediate_data imm;
10078       imm.Int = 0;
10079       emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
10080    }
10081 }
10082 
10083 
10084 static void
emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned cbuf_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate)10085 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10086                              unsigned cbuf_index,
10087                              struct tgsi_full_dst_register *scale,
10088                              struct tgsi_full_dst_register *translate)
10089 {
10090    struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
10091    struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
10092 
10093    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
10094    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
10095 }
10096 
10097 
10098 /**
10099  * A recursive helper function to find the prescale from the constant buffer
10100  */
10101 static void
find_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned num_prescale,struct tgsi_full_src_register * vp_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate,struct tgsi_full_src_register * tmp_src,struct tgsi_full_dst_register * tmp_dst)10102 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10103                         unsigned index, unsigned num_prescale,
10104                         struct tgsi_full_src_register *vp_index,
10105                         struct tgsi_full_dst_register *scale,
10106                         struct tgsi_full_dst_register *translate,
10107                         struct tgsi_full_src_register *tmp_src,
10108                         struct tgsi_full_dst_register *tmp_dst)
10109 {
10110    if (num_prescale == 0)
10111       return;
10112 
10113    if (index > 0) {
10114       /* ELSE */
10115       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10116    }
10117 
10118    struct tgsi_full_src_register index_src =
10119 	                            make_immediate_reg_int(emit, index);
10120 
10121    if (index == 0) {
10122       /* GE tmp, vp_index, index */
10123       emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
10124                            vp_index, &index_src);
10125    } else {
10126       /* EQ tmp, vp_index, index */
10127       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
10128                            vp_index, &index_src);
10129    }
10130 
10131    /* IF tmp */
10132    emit_if(emit, tmp_src);
10133    emit_temp_prescale_from_cbuf(emit,
10134                                 emit->vposition.prescale_cbuf_index + 2 * index,
10135                                 scale, translate);
10136 
10137    find_prescale_from_cbuf(emit, index+1, num_prescale-1,
10138                            vp_index, scale, translate,
10139                            tmp_src, tmp_dst);
10140 
10141    /* ENDIF */
10142    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10143 }
10144 
10145 
10146 /**
10147  * This helper function emits instructions to set the prescale
10148  * and translate temporaries to the correct constants from the
10149  * constant buffer according to the designated viewport.
10150  */
10151 static void
emit_temp_prescale_instructions(struct svga_shader_emitter_v10 * emit)10152 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
10153 {
10154    struct tgsi_full_dst_register prescale_scale =
10155          make_dst_temp_reg(emit->vposition.prescale_scale_index);
10156    struct tgsi_full_dst_register prescale_translate =
10157          make_dst_temp_reg(emit->vposition.prescale_trans_index);
10158 
10159    unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
10160 
10161    if (emit->vposition.num_prescale == 1) {
10162       emit_temp_prescale_from_cbuf(emit,
10163                                    prescale_cbuf_index,
10164                                    &prescale_scale, &prescale_translate);
10165    } else {
10166       /**
10167        * Since SM5 device does not support dynamic indexing, we need
10168        * to do the if-else to find the prescale constants for the
10169        * specified viewport.
10170        */
10171       struct tgsi_full_src_register vp_index_src =
10172          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
10173 
10174       struct tgsi_full_src_register vp_index_src_x =
10175          scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
10176 
10177       unsigned tmp = get_temp_index(emit);
10178       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10179       struct tgsi_full_src_register tmp_src_x =
10180                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10181       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10182 
10183       find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
10184                               &vp_index_src_x,
10185 		              &prescale_scale, &prescale_translate,
10186                               &tmp_src_x, &tmp_dst);
10187    }
10188 
10189    /* Mark prescale temporaries are emitted */
10190    emit->vposition.have_prescale = 1;
10191 }
10192 
10193 
10194 /**
10195  * A helper function to emit an instruction in a vertex shader to add a bias
10196  * to the VertexID system value. This patches the VertexID in the SVGA vertex
10197  * shader to include the base vertex of an indexed primitive or the start index
10198  * of a non-indexed primitive.
10199  */
10200 static void
emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 * emit)10201 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
10202 {
10203    struct tgsi_full_src_register vertex_id_bias_index =
10204       make_src_const_reg(emit->vs.vertex_id_bias_index);
10205    struct tgsi_full_src_register vertex_id_sys_src =
10206       make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
10207    struct tgsi_full_src_register vertex_id_sys_src_x =
10208       scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
10209    struct tgsi_full_dst_register vertex_id_tmp_dst =
10210       make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
10211 
10212    /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
10213    unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
10214    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
10215    emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
10216                         &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
10217                         FALSE);
10218    emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
10219 }
10220 
10221 /**
10222  * Hull Shader must have control point outputs. But tessellation
10223  * control shader can return without writing to control point output.
10224  * In this case, the control point output is assumed to be passthrough
10225  * from the control point input.
10226  * This helper function is to write out a control point output first in case
10227  * the tessellation control shader returns before writing a
10228  * control point output.
10229  */
10230 static void
emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 * emit)10231 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
10232 {
10233    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10234    assert(emit->tcs.control_point_phase);
10235    assert(emit->tcs.control_point_input_index != INVALID_INDEX);
10236    assert(emit->tcs.control_point_out_index != INVALID_INDEX);
10237    assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
10238 
10239    /* UARL ADDR[INDEX].x INVOCATION.xxxx */
10240 
10241    struct tgsi_full_src_register invocation_src;
10242    struct tgsi_full_dst_register addr_dst;
10243    struct tgsi_full_dst_register addr_dst_x;
10244    unsigned addr_tmp;
10245 
10246    addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
10247    addr_dst = make_dst_temp_reg(addr_tmp);
10248    addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
10249 
10250    invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
10251                                  emit->tcs.invocation_id_sys_index);
10252 
10253    begin_emit_instruction(emit);
10254    emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10255    emit_dst_register(emit, &addr_dst_x);
10256    emit_src_register(emit, &invocation_src);
10257    end_emit_instruction(emit);
10258 
10259 
10260    /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
10261 
10262    struct tgsi_full_src_register input_control_point;
10263    struct tgsi_full_dst_register output_control_point;
10264 
10265    input_control_point = make_src_reg(TGSI_FILE_INPUT,
10266                                       emit->tcs.control_point_input_index);
10267    input_control_point.Register.Dimension = 1;
10268    input_control_point.Dimension.Indirect = 1;
10269    input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
10270    input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index;
10271    output_control_point =
10272       make_dst_output_reg(emit->tcs.control_point_out_index);
10273 
10274    begin_emit_instruction(emit);
10275    emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10276    emit_dst_register(emit, &output_control_point);
10277    emit_src_register(emit, &input_control_point);
10278    end_emit_instruction(emit);
10279 }
10280 
10281 /**
10282  * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
10283  * values in domain shader. SM5 has tessfactors as floating point values where
10284  * as tgsi emit them as vector. This function allows to construct temp
10285  * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
10286  * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
10287  * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
10288  */
10289 static void
emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 * emit)10290 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
10291 {
10292    struct tgsi_full_src_register src;
10293    struct tgsi_full_dst_register dst;
10294 
10295    if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
10296       dst = make_dst_temp_reg(emit->tes.inner.temp_index);
10297 
10298       switch (emit->tes.prim_mode) {
10299       case PIPE_PRIM_QUADS:
10300          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10301                   emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
10302          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10303          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10304          FALLTHROUGH;
10305       case PIPE_PRIM_TRIANGLES:
10306          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10307                   emit->tes.inner.in_index, TGSI_SWIZZLE_X);
10308          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10309          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10310          break;
10311       case PIPE_PRIM_LINES:
10312          /**
10313           * As per SM5 spec, InsideTessFactor for isolines are unused.
10314           * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
10315           * any application try to read tessInnerLevel in TES when primitive type
10316           * is isolines, then instead of driver throwing segfault for accesing it,
10317           * return atleast vec(1.0f)
10318           */
10319          src = make_immediate_reg_float(emit, 1.0f);
10320          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10321          break;
10322       default:
10323          break;
10324       }
10325    }
10326 
10327    if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
10328       dst = make_dst_temp_reg(emit->tes.outer.temp_index);
10329 
10330       switch (emit->tes.prim_mode) {
10331       case PIPE_PRIM_QUADS:
10332          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10333                   emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
10334          dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
10335          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10336          FALLTHROUGH;
10337       case PIPE_PRIM_TRIANGLES:
10338          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10339                   emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
10340          dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
10341          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10342          FALLTHROUGH;
10343       case PIPE_PRIM_LINES:
10344          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10345                   emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
10346          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10347          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10348 
10349          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10350                   emit->tes.outer.in_index , TGSI_SWIZZLE_X);
10351          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10352          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10353 
10354          break;
10355       default:
10356          break;
10357       }
10358    }
10359 }
10360 
10361 
10362 static void
emit_initialize_temp_instruction(struct svga_shader_emitter_v10 * emit)10363 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
10364 {
10365    struct tgsi_full_src_register src;
10366    struct tgsi_full_dst_register dst;
10367    unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
10368                                                  emit->initialize_temp_index);
10369    src = make_immediate_reg_float(emit, 0.0f);
10370    dst = make_dst_temp_reg(vgpu10_temp_index);
10371    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10372    emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
10373    emit->initialize_temp_index = INVALID_INDEX;
10374 }
10375 
10376 
10377 /**
10378  * Emit any extra/helper declarations/code that we might need between
10379  * the declaration section and code section.
10380  */
10381 static boolean
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)10382 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
10383 {
10384    /* Properties */
10385    if (emit->unit == PIPE_SHADER_GEOMETRY)
10386       emit_property_instructions(emit);
10387    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10388       emit_hull_shader_declarations(emit);
10389 
10390       /* Save the position of the first instruction token so that we can
10391        * do a second pass of the instructions for the patch constant phase.
10392        */
10393       emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
10394       emit->tcs.fork_phase_add_signature = FALSE;
10395 
10396       if (!emit_hull_shader_control_point_phase(emit)) {
10397          emit->skip_instruction = TRUE;
10398          return TRUE;
10399       }
10400 
10401       /* Set the current tcs phase to control point phase */
10402       emit->tcs.control_point_phase = TRUE;
10403    }
10404    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10405       emit_domain_shader_declarations(emit);
10406    }
10407 
10408    /* Declare inputs */
10409    if (!emit_input_declarations(emit))
10410       return FALSE;
10411 
10412    /* Declare outputs */
10413    if (!emit_output_declarations(emit))
10414       return FALSE;
10415 
10416    /* Declare temporary registers */
10417    emit_temporaries_declaration(emit);
10418 
10419    /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
10420     * will already be declared in hs_decls (emit_hull_shader_declarations)
10421     */
10422    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10423       /* Declare constant registers */
10424       emit_constant_declaration(emit);
10425 
10426       /* Declare samplers and resources */
10427       emit_sampler_declarations(emit);
10428       emit_resource_declarations(emit);
10429 
10430       alloc_common_immediates(emit);
10431       /* Now, emit the constant block containing all the immediates
10432        * declared by shader, as well as the extra ones seen above.
10433        */
10434    }
10435 
10436    if (emit->unit != PIPE_SHADER_FRAGMENT) {
10437       /*
10438        * Declare clip distance output registers for ClipVertex or
10439        * user defined planes
10440        */
10441       emit_clip_distance_declarations(emit);
10442    }
10443 
10444    if (emit->unit == PIPE_SHADER_FRAGMENT &&
10445        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10446       float alpha = emit->key.fs.alpha_ref;
10447       emit->fs.alpha_ref_index =
10448          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
10449    }
10450 
10451    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10452       /**
10453        * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
10454        * hs_decls
10455        */
10456       emit_vgpu10_immediates_block(emit);
10457    }
10458    else {
10459       emit_tcs_default_control_point_output(emit);
10460    }
10461 
10462    if (emit->unit == PIPE_SHADER_FRAGMENT) {
10463       emit_frontface_instructions(emit);
10464       emit_fragcoord_instructions(emit);
10465       emit_sample_position_instructions(emit);
10466       emit_default_layer_instructions(emit);
10467    }
10468    else if (emit->unit == PIPE_SHADER_VERTEX) {
10469       emit_vertex_attrib_instructions(emit);
10470 
10471       if (emit->info.uses_vertexid)
10472          emit_vertex_id_nobase_instruction(emit);
10473    }
10474    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10475       emit_temp_tessfactor_instructions(emit);
10476    }
10477 
10478    /**
10479     * For geometry shader that writes to viewport index, the prescale
10480     * temporaries will be done at the first vertex emission.
10481     */
10482    if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
10483       emit_temp_prescale_instructions(emit);
10484 
10485    return TRUE;
10486 }
10487 
10488 
10489 /**
10490  * The device has no direct support for the pipe_blend_state::alpha_to_one
10491  * option so we implement it here with shader code.
10492  *
10493  * Note that this is kind of pointless, actually.  Here we're clobbering
10494  * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
10495  * up with 100% coverage.  That's almost certainly not what the user wants.
10496  * The work-around is to add extra shader code to compute coverage from alpha
10497  * and write it to the coverage output register (if the user's shader doesn't
10498  * do so already).  We'll probably do that in the future.
10499  */
10500 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10501 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
10502                                unsigned fs_color_tmp_index)
10503 {
10504    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
10505    unsigned i;
10506 
10507    /* Note: it's not 100% clear from the spec if we're supposed to clobber
10508     * the alpha for all render targets.  But that's what NVIDIA does and
10509     * that's what Piglit tests.
10510     */
10511    for (i = 0; i < emit->fs.num_color_outputs; i++) {
10512       struct tgsi_full_dst_register color_dst;
10513 
10514       if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
10515          /* write to the temp color register */
10516          color_dst = make_dst_temp_reg(fs_color_tmp_index);
10517       }
10518       else {
10519          /* write directly to the color[i] output */
10520          color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
10521       }
10522 
10523       color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
10524 
10525       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
10526    }
10527 }
10528 
10529 
10530 /**
10531  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
10532  * against the alpha reference value and discards the fragment if the
10533  * comparison fails.
10534  */
10535 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10536 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
10537                              unsigned fs_color_tmp_index)
10538 {
10539    /* compare output color's alpha to alpha ref and kill */
10540    unsigned tmp = get_temp_index(emit);
10541    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10542    struct tgsi_full_src_register tmp_src_x =
10543       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10544    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10545    struct tgsi_full_src_register color_src =
10546       make_src_temp_reg(fs_color_tmp_index);
10547    struct tgsi_full_src_register color_src_w =
10548       scalar_src(&color_src, TGSI_SWIZZLE_W);
10549    struct tgsi_full_src_register ref_src =
10550       make_src_immediate_reg(emit->fs.alpha_ref_index);
10551    struct tgsi_full_dst_register color_dst =
10552       make_dst_output_reg(emit->fs.color_out_index[0]);
10553 
10554    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10555 
10556    /* dst = src0 'alpha_func' src1 */
10557    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
10558                    &color_src_w, &ref_src);
10559 
10560    /* DISCARD if dst.x == 0 */
10561    begin_emit_instruction(emit);
10562    emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
10563    emit_src_register(emit, &tmp_src_x);
10564    end_emit_instruction(emit);
10565 
10566    /* If we don't need to broadcast the color below, emit the final color here.
10567     */
10568    if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
10569       /* MOV output.color, tempcolor */
10570       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10571    }
10572 
10573    free_temp_indexes(emit);
10574 }
10575 
10576 
10577 /**
10578  * Emit instructions for writing a single color output to multiple
10579  * color buffers.
10580  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
10581  * when key.fs.white_fragments is true).
10582  * property is set and the number of render targets is greater than one.
10583  * \param fs_color_tmp_index  index of the temp register that holds the
10584  *                            color to broadcast.
10585  */
10586 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10587 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
10588                                  unsigned fs_color_tmp_index)
10589 {
10590    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
10591    unsigned i;
10592    struct tgsi_full_src_register color_src;
10593 
10594    if (emit->key.fs.white_fragments) {
10595       /* set all color outputs to white */
10596       color_src = make_immediate_reg_float(emit, 1.0f);
10597    }
10598    else {
10599       /* set all color outputs to TEMP[fs_color_tmp_index] */
10600       assert(fs_color_tmp_index != INVALID_INDEX);
10601       color_src = make_src_temp_reg(fs_color_tmp_index);
10602    }
10603 
10604    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10605 
10606    for (i = 0; i < n; i++) {
10607       unsigned output_reg = emit->fs.color_out_index[i];
10608       struct tgsi_full_dst_register color_dst =
10609          make_dst_output_reg(output_reg);
10610 
10611       /* Fill in this semantic here since we'll use it later in
10612        * emit_dst_register().
10613        */
10614       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
10615 
10616       /* MOV output.color[i], tempcolor */
10617       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10618    }
10619 }
10620 
10621 
10622 /**
10623  * Emit extra helper code after the original shader code, but before the
10624  * last END/RET instruction.
10625  * For vertex shaders this means emitting the extra code to apply the
10626  * prescale scale/translation.
10627  */
10628 static boolean
emit_post_helpers(struct svga_shader_emitter_v10 * emit)10629 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
10630 {
10631    if (emit->unit == PIPE_SHADER_VERTEX) {
10632       emit_vertex_instructions(emit);
10633    }
10634    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
10635       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
10636 
10637       assert(!(emit->key.fs.white_fragments &&
10638                emit->key.fs.write_color0_to_n_cbufs == 0));
10639 
10640       /* We no longer want emit_dst_register() to substitute the
10641        * temporary fragment color register for the real color output.
10642        */
10643       emit->fs.color_tmp_index = INVALID_INDEX;
10644 
10645       if (emit->key.fs.alpha_to_one) {
10646          emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
10647       }
10648       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10649          emit_alpha_test_instructions(emit, fs_color_tmp_index);
10650       }
10651       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
10652           emit->key.fs.white_fragments) {
10653          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
10654       }
10655    }
10656    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10657       if (!emit->tcs.control_point_phase) {
10658          /* store the tessellation levels in the patch constant phase only */
10659          store_tesslevels(emit);
10660       }
10661       else {
10662          emit_clipping_instructions(emit);
10663       }
10664    }
10665    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10666       emit_vertex_instructions(emit);
10667    }
10668 
10669    return TRUE;
10670 }
10671 
10672 
10673 /**
10674  * Translate the TGSI tokens into VGPU10 tokens.
10675  */
10676 static boolean
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)10677 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
10678                          const struct tgsi_token *tokens)
10679 {
10680    struct tgsi_parse_context parse;
10681    boolean ret = TRUE;
10682    boolean pre_helpers_emitted = FALSE;
10683    unsigned inst_number = 0;
10684 
10685    tgsi_parse_init(&parse, tokens);
10686 
10687    while (!tgsi_parse_end_of_tokens(&parse)) {
10688 
10689       /* Save the current tgsi token starting position */
10690       emit->cur_tgsi_token = parse.Position;
10691 
10692       tgsi_parse_token(&parse);
10693 
10694       switch (parse.FullToken.Token.Type) {
10695       case TGSI_TOKEN_TYPE_IMMEDIATE:
10696          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
10697          if (!ret)
10698             goto done;
10699          break;
10700 
10701       case TGSI_TOKEN_TYPE_DECLARATION:
10702          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
10703          if (!ret)
10704             goto done;
10705          break;
10706 
10707       case TGSI_TOKEN_TYPE_INSTRUCTION:
10708          if (!pre_helpers_emitted) {
10709             ret = emit_pre_helpers(emit);
10710             if (!ret)
10711                goto done;
10712             pre_helpers_emitted = TRUE;
10713          }
10714          ret = emit_vgpu10_instruction(emit, inst_number++,
10715                                        &parse.FullToken.FullInstruction);
10716 
10717          /* Usually this applies to TCS only. If shader is reading control
10718           * point outputs in control point phase, we should reemit all
10719           * instructions which are writting into control point output in
10720           * control phase to store results into temporaries.
10721           */
10722          if (emit->reemit_instruction) {
10723             assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10724             ret = emit_vgpu10_instruction(emit, inst_number,
10725                                           &parse.FullToken.FullInstruction);
10726          }
10727          else if (emit->initialize_temp_index != INVALID_INDEX) {
10728             emit_initialize_temp_instruction(emit);
10729             emit->initialize_temp_index = INVALID_INDEX;
10730             ret = emit_vgpu10_instruction(emit, inst_number - 1,
10731                                           &parse.FullToken.FullInstruction);
10732          }
10733 
10734          if (!ret)
10735             goto done;
10736          break;
10737 
10738       case TGSI_TOKEN_TYPE_PROPERTY:
10739          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
10740          if (!ret)
10741             goto done;
10742          break;
10743 
10744       default:
10745          break;
10746       }
10747    }
10748 
10749    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10750       ret = emit_hull_shader_patch_constant_phase(emit, &parse);
10751    }
10752 
10753 done:
10754    tgsi_parse_free(&parse);
10755    return ret;
10756 }
10757 
10758 
10759 /**
10760  * Emit the first VGPU10 shader tokens.
10761  */
10762 static boolean
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)10763 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
10764 {
10765    VGPU10ProgramToken ptoken;
10766 
10767    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
10768    ptoken.value = 0; /* init whole token to zero */
10769    ptoken.majorVersion = emit->version / 10;
10770    ptoken.minorVersion = emit->version % 10;
10771    ptoken.programType = translate_shader_type(emit->unit);
10772    if (!emit_dword(emit, ptoken.value))
10773       return FALSE;
10774 
10775    /* Second token: total length of shader, in tokens.  We can't fill this
10776     * in until we're all done.  Emit zero for now.
10777     */
10778    if (!emit_dword(emit, 0))
10779       return FALSE;
10780 
10781    if (emit->version >= 50) {
10782       VGPU10OpcodeToken0 token;
10783 
10784       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10785          /* For hull shader, we need to start the declarations phase first before
10786           * emitting any declarations including the global flags.
10787           */
10788          token.value = 0;
10789          token.opcodeType = VGPU10_OPCODE_HS_DECLS;
10790          begin_emit_instruction(emit);
10791          emit_dword(emit, token.value);
10792          end_emit_instruction(emit);
10793       }
10794 
10795       /* Emit global flags */
10796       token.value = 0;    /* init whole token to zero */
10797       token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10798       token.enableDoublePrecisionFloatOps = 1;  /* set bit */
10799       token.instructionLength = 1;
10800       if (!emit_dword(emit, token.value))
10801          return FALSE;
10802    }
10803 
10804    if (emit->version >= 40) {
10805       VGPU10OpcodeToken0 token;
10806 
10807       /* Reserved for global flag such as refactoringAllowed.
10808        * If the shader does not use the precise qualifier, we will set the
10809        * refactoringAllowed global flag; otherwise, we will leave the reserved
10810        * token to NOP.
10811        */
10812       emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
10813       token.value = 0;
10814       token.opcodeType = VGPU10_OPCODE_NOP;
10815       token.instructionLength = 1;
10816       if (!emit_dword(emit, token.value))
10817          return FALSE;
10818    }
10819 
10820    return TRUE;
10821 }
10822 
10823 
10824 static boolean
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)10825 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
10826 {
10827    VGPU10ProgramToken *tokens;
10828 
10829    /* Replace the second token with total shader length */
10830    tokens = (VGPU10ProgramToken *) emit->buf;
10831    tokens[1].value = emit_get_num_tokens(emit);
10832 
10833    if (emit->version >= 40 && !emit->uses_precise_qualifier) {
10834       /* Replace the reserved token with the RefactoringAllowed global flag */
10835       VGPU10OpcodeToken0 *ptoken;
10836 
10837       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
10838       assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
10839       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10840       ptoken->refactoringAllowed = 1;
10841    }
10842 
10843    return TRUE;
10844 }
10845 
10846 
10847 /**
10848  * Modify the FS to read the BCOLORs and use the FACE register
10849  * to choose between the front/back colors.
10850  */
10851 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)10852 transform_fs_twoside(const struct tgsi_token *tokens)
10853 {
10854    if (0) {
10855       debug_printf("Before tgsi_add_two_side ------------------\n");
10856       tgsi_dump(tokens,0);
10857    }
10858    tokens = tgsi_add_two_side(tokens);
10859    if (0) {
10860       debug_printf("After tgsi_add_two_side ------------------\n");
10861       tgsi_dump(tokens, 0);
10862    }
10863    return tokens;
10864 }
10865 
10866 
10867 /**
10868  * Modify the FS to do polygon stipple.
10869  */
10870 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)10871 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
10872                       const struct tgsi_token *tokens)
10873 {
10874    const struct tgsi_token *new_tokens;
10875    unsigned unit;
10876 
10877    if (0) {
10878       debug_printf("Before pstipple ------------------\n");
10879       tgsi_dump(tokens,0);
10880    }
10881 
10882    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
10883                                                      TGSI_FILE_INPUT);
10884 
10885    emit->fs.pstipple_sampler_unit = unit;
10886 
10887    /* Setup texture state for stipple */
10888    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
10889    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
10890    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
10891    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
10892    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
10893    emit->key.tex[unit].target = PIPE_TEXTURE_2D;
10894 
10895    if (0) {
10896       debug_printf("After pstipple ------------------\n");
10897       tgsi_dump(new_tokens, 0);
10898    }
10899 
10900    return new_tokens;
10901 }
10902 
10903 /**
10904  * Modify the FS to support anti-aliasing point.
10905  */
10906 static const struct tgsi_token *
transform_fs_aapoint(const struct tgsi_token * tokens,int aa_coord_index)10907 transform_fs_aapoint(const struct tgsi_token *tokens,
10908                      int aa_coord_index)
10909 {
10910    if (0) {
10911       debug_printf("Before tgsi_add_aa_point ------------------\n");
10912       tgsi_dump(tokens,0);
10913    }
10914    tokens = tgsi_add_aa_point(tokens, aa_coord_index);
10915    if (0) {
10916       debug_printf("After tgsi_add_aa_point ------------------\n");
10917       tgsi_dump(tokens, 0);
10918    }
10919    return tokens;
10920 }
10921 
10922 
10923 /**
10924  * A helper function to determine the shader in the previous stage and
10925  * then call the linker function to determine the input mapping for this
10926  * shader to match the output indices from the shader in the previous stage.
10927  */
10928 static void
compute_input_mapping(struct svga_context * svga,struct svga_shader_emitter_v10 * emit,enum pipe_shader_type unit)10929 compute_input_mapping(struct svga_context *svga,
10930                       struct svga_shader_emitter_v10 *emit,
10931                       enum pipe_shader_type unit)
10932 {
10933    struct svga_shader *prevShader = NULL;   /* shader in the previous stage */
10934 
10935    if (unit == PIPE_SHADER_FRAGMENT) {
10936       prevShader = svga->curr.gs ?
10937          &svga->curr.gs->base : (svga->curr.tes ?
10938          &svga->curr.tes->base : &svga->curr.vs->base);
10939    } else if (unit == PIPE_SHADER_GEOMETRY) {
10940       prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
10941    } else if (unit == PIPE_SHADER_TESS_EVAL) {
10942       assert(svga->curr.tcs);
10943       prevShader = &svga->curr.tcs->base;
10944    } else if (unit == PIPE_SHADER_TESS_CTRL) {
10945       assert(svga->curr.vs);
10946       prevShader = &svga->curr.vs->base;
10947    }
10948 
10949    if (prevShader != NULL) {
10950       svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage);
10951       emit->prevShaderInfo = &prevShader->info;
10952    }
10953    else {
10954       /**
10955        * Since vertex shader does not need to go through the linker to
10956        * establish the input map, we need to make sure the highest index
10957        * of input registers is set properly here.
10958        */
10959       emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
10960                                          emit->info.file_max[TGSI_FILE_INPUT]);
10961    }
10962 }
10963 
10964 
10965 /**
10966  * Copies the shader signature info to the shader variant
10967  */
10968 static void
copy_shader_signature(struct svga_shader_signature * sgn,struct svga_shader_variant * variant)10969 copy_shader_signature(struct svga_shader_signature *sgn,
10970                       struct svga_shader_variant *variant)
10971 {
10972    SVGA3dDXShaderSignatureHeader *header = &sgn->header;
10973 
10974    /* Calculate the signature length */
10975    variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
10976                            (header->numInputSignatures +
10977                             header->numOutputSignatures +
10978                             header->numPatchConstantSignatures) *
10979                            sizeof(SVGA3dDXShaderSignatureEntry);
10980 
10981    /* Allocate buffer for the signature info */
10982    variant->signature =
10983       (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
10984 
10985    char *sgnBuf = (char *)variant->signature;
10986    unsigned sgnLen;
10987 
10988    /* Copy the signature info to the shader variant structure */
10989    memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
10990    sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
10991 
10992    if (header->numInputSignatures) {
10993       sgnLen =
10994          header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10995       memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
10996       sgnBuf += sgnLen;
10997    }
10998 
10999    if (header->numOutputSignatures) {
11000       sgnLen =
11001          header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
11002       memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
11003       sgnBuf += sgnLen;
11004    }
11005 
11006    if (header->numPatchConstantSignatures) {
11007       sgnLen =
11008          header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
11009       memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
11010    }
11011 }
11012 
11013 
11014 /**
11015  * This is the main entrypoint for the TGSI -> VPGU10 translator.
11016  */
11017 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,enum pipe_shader_type unit)11018 svga_tgsi_vgpu10_translate(struct svga_context *svga,
11019                            const struct svga_shader *shader,
11020                            const struct svga_compile_key *key,
11021                            enum pipe_shader_type unit)
11022 {
11023    struct svga_shader_variant *variant = NULL;
11024    struct svga_shader_emitter_v10 *emit;
11025    const struct tgsi_token *tokens = shader->tokens;
11026 
11027    (void) make_immediate_reg_double;   /* unused at this time */
11028 
11029    assert(unit == PIPE_SHADER_VERTEX ||
11030           unit == PIPE_SHADER_GEOMETRY ||
11031           unit == PIPE_SHADER_FRAGMENT ||
11032           unit == PIPE_SHADER_TESS_CTRL ||
11033           unit == PIPE_SHADER_TESS_EVAL ||
11034           unit == PIPE_SHADER_COMPUTE);
11035 
11036    /* These two flags cannot be used together */
11037    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
11038 
11039    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
11040    /*
11041     * Setup the code emitter
11042     */
11043    emit = alloc_emitter();
11044    if (!emit)
11045       goto done;
11046 
11047    emit->unit = unit;
11048    if (svga_have_sm5(svga)) {
11049       emit->version = 50;
11050    } else if (svga_have_sm4_1(svga)) {
11051       emit->version = 41;
11052    } else {
11053       emit->version = 40;
11054    }
11055 
11056    emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
11057 
11058    emit->key = *key;
11059 
11060    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
11061                                     emit->key.gs.need_prescale ||
11062                                     emit->key.tes.need_prescale);
11063 
11064    /* Determine how many prescale factors in the constant buffer */
11065    emit->vposition.num_prescale = 1;
11066    if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
11067       assert(emit->unit == PIPE_SHADER_GEOMETRY);
11068       emit->vposition.num_prescale = emit->key.gs.num_prescale;
11069    }
11070 
11071    emit->vposition.tmp_index = INVALID_INDEX;
11072    emit->vposition.so_index = INVALID_INDEX;
11073    emit->vposition.out_index = INVALID_INDEX;
11074 
11075    emit->vs.vertex_id_sys_index = INVALID_INDEX;
11076    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11077    emit->vs.vertex_id_bias_index = INVALID_INDEX;
11078 
11079    emit->fs.color_tmp_index = INVALID_INDEX;
11080    emit->fs.face_input_index = INVALID_INDEX;
11081    emit->fs.fragcoord_input_index = INVALID_INDEX;
11082    emit->fs.sample_id_sys_index = INVALID_INDEX;
11083    emit->fs.sample_pos_sys_index = INVALID_INDEX;
11084    emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
11085    emit->fs.layer_input_index = INVALID_INDEX;
11086    emit->fs.layer_imm_index = INVALID_INDEX;
11087 
11088    emit->gs.prim_id_index = INVALID_INDEX;
11089    emit->gs.invocation_id_sys_index = INVALID_INDEX;
11090    emit->gs.viewport_index_out_index = INVALID_INDEX;
11091    emit->gs.viewport_index_tmp_index = INVALID_INDEX;
11092 
11093    emit->tcs.vertices_per_patch_index = INVALID_INDEX;
11094    emit->tcs.invocation_id_sys_index = INVALID_INDEX;
11095    emit->tcs.control_point_input_index = INVALID_INDEX;
11096    emit->tcs.control_point_addr_index = INVALID_INDEX;
11097    emit->tcs.control_point_out_index = INVALID_INDEX;
11098    emit->tcs.control_point_tmp_index = INVALID_INDEX;
11099    emit->tcs.control_point_out_count = 0;
11100    emit->tcs.inner.out_index = INVALID_INDEX;
11101    emit->tcs.inner.out_index = INVALID_INDEX;
11102    emit->tcs.inner.temp_index = INVALID_INDEX;
11103    emit->tcs.inner.tgsi_index = INVALID_INDEX;
11104    emit->tcs.outer.out_index = INVALID_INDEX;
11105    emit->tcs.outer.temp_index = INVALID_INDEX;
11106    emit->tcs.outer.tgsi_index = INVALID_INDEX;
11107    emit->tcs.patch_generic_out_count = 0;
11108    emit->tcs.patch_generic_out_index = INVALID_INDEX;
11109    emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
11110    emit->tcs.prim_id_index = INVALID_INDEX;
11111 
11112    emit->tes.tesscoord_sys_index = INVALID_INDEX;
11113    emit->tes.inner.in_index = INVALID_INDEX;
11114    emit->tes.inner.temp_index = INVALID_INDEX;
11115    emit->tes.inner.tgsi_index = INVALID_INDEX;
11116    emit->tes.outer.in_index = INVALID_INDEX;
11117    emit->tes.outer.temp_index = INVALID_INDEX;
11118    emit->tes.outer.tgsi_index = INVALID_INDEX;
11119    emit->tes.prim_id_index = INVALID_INDEX;
11120 
11121    emit->clip_dist_out_index = INVALID_INDEX;
11122    emit->clip_dist_tmp_index = INVALID_INDEX;
11123    emit->clip_dist_so_index = INVALID_INDEX;
11124    emit->clip_vertex_out_index = INVALID_INDEX;
11125    emit->clip_vertex_tmp_index = INVALID_INDEX;
11126    emit->svga_debug_callback = svga->debug.callback;
11127 
11128    emit->index_range.start_index = INVALID_INDEX;
11129    emit->index_range.count = 0;
11130    emit->index_range.required = FALSE;
11131    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
11132    emit->index_range.dim = 0;
11133    emit->index_range.size = 0;
11134 
11135    emit->current_loop_depth = 0;
11136 
11137    emit->initialize_temp_index = INVALID_INDEX;
11138 
11139    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
11140       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
11141    }
11142 
11143    if (unit == PIPE_SHADER_FRAGMENT) {
11144       if (key->fs.light_twoside) {
11145          tokens = transform_fs_twoside(tokens);
11146       }
11147       if (key->fs.pstipple) {
11148          const struct tgsi_token *new_tokens =
11149             transform_fs_pstipple(emit, tokens);
11150          if (tokens != shader->tokens) {
11151             /* free the two-sided shader tokens */
11152             tgsi_free_tokens(tokens);
11153          }
11154          tokens = new_tokens;
11155       }
11156       if (key->fs.aa_point) {
11157          tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
11158       }
11159    }
11160 
11161    if (SVGA_DEBUG & DEBUG_TGSI) {
11162       debug_printf("#####################################\n");
11163       debug_printf("### TGSI Shader %u\n", shader->id);
11164       tgsi_dump(tokens, 0);
11165    }
11166 
11167    /**
11168     * Rescan the header if the token string is different from the one
11169     * included in the shader; otherwise, the header info is already up-to-date
11170     */
11171    if (tokens != shader->tokens) {
11172       tgsi_scan_shader(tokens, &emit->info);
11173    } else {
11174       emit->info = shader->info;
11175    }
11176 
11177    emit->num_outputs = emit->info.num_outputs;
11178 
11179    /**
11180     * Compute input mapping to match the outputs from shader
11181     * in the previous stage
11182     */
11183    compute_input_mapping(svga, emit, unit);
11184 
11185    determine_clipping_mode(emit);
11186 
11187    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
11188        unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
11189       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
11190          /* if there is stream output declarations associated
11191           * with this shader or the shader writes to ClipDistance
11192           * then reserve extra registers for the non-adjusted vertex position
11193           * and the ClipDistance shadow copy.
11194           */
11195          emit->vposition.so_index = emit->num_outputs++;
11196 
11197          if (emit->clip_mode == CLIP_DISTANCE) {
11198             emit->clip_dist_so_index = emit->num_outputs++;
11199             if (emit->info.num_written_clipdistance > 4)
11200                emit->num_outputs++;
11201          }
11202       }
11203    }
11204 
11205    /*
11206     * Do actual shader translation.
11207     */
11208    if (!emit_vgpu10_header(emit)) {
11209       debug_printf("svga: emit VGPU10 header failed\n");
11210       goto cleanup;
11211    }
11212 
11213    if (!emit_vgpu10_instructions(emit, tokens)) {
11214       debug_printf("svga: emit VGPU10 instructions failed\n");
11215       goto cleanup;
11216    }
11217 
11218    if (!emit_vgpu10_tail(emit)) {
11219       debug_printf("svga: emit VGPU10 tail failed\n");
11220       goto cleanup;
11221    }
11222 
11223    if (emit->register_overflow) {
11224       goto cleanup;
11225    }
11226 
11227    /*
11228     * Create, initialize the 'variant' object.
11229     */
11230    variant = svga_new_shader_variant(svga, unit);
11231    if (!variant)
11232       goto cleanup;
11233 
11234    variant->shader = shader;
11235    variant->nr_tokens = emit_get_num_tokens(emit);
11236    variant->tokens = (const unsigned *)emit->buf;
11237 
11238    /* Copy shader signature info to the shader variant */
11239    if (svga_have_sm5(svga)) {
11240       copy_shader_signature(&emit->signature, variant);
11241    }
11242 
11243    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
11244    memcpy(&variant->key, key, sizeof(*key));
11245    variant->id = UTIL_BITMASK_INVALID_INDEX;
11246 
11247    /* The extra constant starting offset starts with the number of
11248     * shader constants declared in the shader.
11249     */
11250    variant->extra_const_start = emit->num_shader_consts[0];
11251    if (key->gs.wide_point) {
11252       /**
11253        * The extra constant added in the transformed shader
11254        * for inverse viewport scale is to be supplied by the driver.
11255        * So the extra constant starting offset needs to be reduced by 1.
11256        */
11257       assert(variant->extra_const_start > 0);
11258       variant->extra_const_start--;
11259    }
11260 
11261    if (unit == PIPE_SHADER_FRAGMENT) {
11262       struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
11263 
11264       fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
11265 
11266       /* If there was exactly one write to a fragment shader output register
11267        * and it came from a constant buffer, we know all fragments will have
11268        * the same color (except for blending).
11269        */
11270       fs_variant->constant_color_output =
11271          emit->constant_color_output && emit->num_output_writes == 1;
11272 
11273       /** keep track in the variant if flat interpolation is used
11274        *  for any of the varyings.
11275        */
11276       fs_variant->uses_flat_interp = emit->uses_flat_interp;
11277 
11278       fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
11279    }
11280    else if (unit == PIPE_SHADER_TESS_EVAL) {
11281       struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
11282 
11283       /* Keep track in the tes variant some of the layout parameters.
11284        * These parameters will be referenced by the tcs to emit
11285        * the necessary declarations for the hull shader.
11286        */
11287       tes_variant->prim_mode = emit->tes.prim_mode;
11288       tes_variant->spacing = emit->tes.spacing;
11289       tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
11290       tes_variant->point_mode = emit->tes.point_mode;
11291    }
11292 
11293 
11294    if (tokens != shader->tokens) {
11295       tgsi_free_tokens(tokens);
11296    }
11297 
11298 cleanup:
11299    free_emitter(emit);
11300 
11301 done:
11302    SVGA_STATS_TIME_POP(svga_sws(svga));
11303    return variant;
11304 }
11305