1 /**********************************************************
2 * Copyright 1998-2013 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 /**
27 * @file svga_tgsi_vgpu10.c
28 *
29 * TGSI -> VGPU10 shader translation.
30 *
31 * \author Mingcheng Chen
32 * \author Brian Paul
33 */
34
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_strings.h"
44 #include "tgsi/tgsi_two_side.h"
45 #include "tgsi/tgsi_aa_point.h"
46 #include "tgsi/tgsi_util.h"
47 #include "util/u_math.h"
48 #include "util/u_memory.h"
49 #include "util/u_bitmask.h"
50 #include "util/u_debug.h"
51 #include "util/u_pstipple.h"
52
53 #include "svga_context.h"
54 #include "svga_debug.h"
55 #include "svga_link.h"
56 #include "svga_shader.h"
57 #include "svga_tgsi.h"
58
59 #include "VGPU10ShaderTokens.h"
60
61
62 #define INVALID_INDEX 99999
63 #define MAX_INTERNAL_TEMPS 3
64 #define MAX_SYSTEM_VALUES 4
65 #define MAX_IMMEDIATE_COUNT \
66 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
67 #define MAX_TEMP_ARRAYS 64 /* Enough? */
68
69
70 /**
71 * Clipping is complicated. There's four different cases which we
72 * handle during VS/GS shader translation:
73 */
74 enum clipping_mode
75 {
76 CLIP_NONE, /**< No clipping enabled */
77 CLIP_LEGACY, /**< The shader has no clipping declarations or code but
78 * one or more user-defined clip planes are enabled. We
79 * generate extra code to emit clip distances.
80 */
81 CLIP_DISTANCE, /**< The shader already declares clip distance output
82 * registers and has code to write to them.
83 */
84 CLIP_VERTEX /**< The shader declares a clip vertex output register and
85 * has code that writes to the register. We convert the
86 * clipvertex position into one or more clip distances.
87 */
88 };
89
90
91 /* Shader signature info */
92 struct svga_shader_signature
93 {
94 SVGA3dDXShaderSignatureHeader header;
95 SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
96 SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
97 SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
98 };
99
100 static inline void
set_shader_signature_entry(SVGA3dDXShaderSignatureEntry * e,unsigned index,SVGA3dDXSignatureSemanticName sgnName,unsigned mask,SVGA3dDXSignatureRegisterComponentType compType,SVGA3dDXSignatureMinPrecision minPrecision)101 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
102 unsigned index,
103 SVGA3dDXSignatureSemanticName sgnName,
104 unsigned mask,
105 SVGA3dDXSignatureRegisterComponentType compType,
106 SVGA3dDXSignatureMinPrecision minPrecision)
107 {
108 e->registerIndex = index;
109 e->semanticName = sgnName;
110 e->mask = mask;
111 e->componentType = compType;
112 e->minPrecision = minPrecision;
113 };
114
115 static const SVGA3dDXSignatureSemanticName
116 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
117 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
118 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
124 SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
125 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
126 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
127 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
128 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
129 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
131 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
138 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
139 SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
140 SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
141 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
142 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
143 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
144 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
145 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
160 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
161 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
162 };
163
164
165 /**
166 * Map tgsi semantic name to SVGA signature semantic name
167 */
168 static inline SVGA3dDXSignatureSemanticName
map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)169 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
170 {
171 assert(name < TGSI_SEMANTIC_COUNT);
172
173 /* Do a few asserts here to spot check the mapping */
174 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
175 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
176 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
177 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
178 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
179 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
180
181 return tgsi_semantic_to_sgn_name[name];
182 }
183
184
185 struct svga_shader_emitter_v10
186 {
187 /* The token output buffer */
188 unsigned size;
189 char *buf;
190 char *ptr;
191
192 /* Information about the shader and state (does not change) */
193 struct svga_compile_key key;
194 struct tgsi_shader_info info;
195 unsigned unit;
196 unsigned version; /**< Either 40 or 41 at this time */
197
198 unsigned cur_tgsi_token; /**< current tgsi token position */
199 unsigned inst_start_token;
200 boolean discard_instruction; /**< throw away current instruction? */
201 boolean reemit_instruction; /**< reemit current instruction */
202 boolean skip_instruction; /**< skip current instruction */
203
204 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
205 double (*immediates_dbl)[2];
206 unsigned num_immediates; /**< Number of immediates emitted */
207 unsigned common_immediate_pos[10]; /**< literals for common immediates */
208 unsigned num_common_immediates;
209 boolean immediates_emitted;
210
211 unsigned num_outputs; /**< include any extra outputs */
212 /** The first extra output is reserved for
213 * non-adjusted vertex position for
214 * stream output purpose
215 */
216
217 /* Temporary Registers */
218 unsigned num_shader_temps; /**< num of temps used by original shader */
219 unsigned internal_temp_count; /**< currently allocated internal temps */
220 struct {
221 unsigned start, size;
222 } temp_arrays[MAX_TEMP_ARRAYS];
223 unsigned num_temp_arrays;
224
225 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
226 struct {
227 unsigned arrayId, index;
228 boolean initialized;
229 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
230
231 unsigned initialize_temp_index;
232
233 /** Number of constants used by original shader for each constant buffer.
234 * The size should probably always match with that of svga_state.constbufs.
235 */
236 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
237
238 /* Samplers */
239 unsigned num_samplers;
240 boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/
241 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
242 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
243
244 /* Index Range declaration */
245 struct {
246 unsigned start_index;
247 unsigned count;
248 boolean required;
249 unsigned operandType;
250 unsigned size;
251 unsigned dim;
252 } index_range;
253
254 /* Address regs (really implemented with temps) */
255 unsigned num_address_regs;
256 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
257
258 /* Output register usage masks */
259 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
260
261 /* To map TGSI system value index to VGPU shader input indexes */
262 ubyte system_value_indexes[MAX_SYSTEM_VALUES];
263
264 struct {
265 /* vertex position scale/translation */
266 unsigned out_index; /**< the real position output reg */
267 unsigned tmp_index; /**< the fake/temp position output reg */
268 unsigned so_index; /**< the non-adjusted position output reg */
269 unsigned prescale_cbuf_index; /* index to the const buf for prescale */
270 unsigned prescale_scale_index, prescale_trans_index;
271 unsigned num_prescale; /* number of prescale factor in const buf */
272 unsigned viewport_index;
273 unsigned need_prescale:1;
274 unsigned have_prescale:1;
275 } vposition;
276
277 /* For vertex shaders only */
278 struct {
279 /* viewport constant */
280 unsigned viewport_index;
281
282 unsigned vertex_id_bias_index;
283 unsigned vertex_id_sys_index;
284 unsigned vertex_id_tmp_index;
285
286 /* temp index of adjusted vertex attributes */
287 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
288 } vs;
289
290 /* For fragment shaders only */
291 struct {
292 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
293 unsigned num_color_outputs;
294 unsigned color_tmp_index; /**< fake/temp color output reg */
295 unsigned alpha_ref_index; /**< immediate constant for alpha ref */
296
297 /* front-face */
298 unsigned face_input_index; /**< real fragment shader face reg (bool) */
299 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */
300
301 unsigned pstipple_sampler_unit;
302
303 unsigned fragcoord_input_index; /**< real fragment position input reg */
304 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
305
306 unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */
307
308 unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
309 unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
310
311 /** TGSI index of sample mask input sys value */
312 unsigned sample_mask_in_sys_index;
313
314 /** Which texture units are doing shadow comparison in the FS code */
315 unsigned shadow_compare_units;
316
317 /* layer */
318 unsigned layer_input_index; /**< TGSI index of layer */
319 unsigned layer_imm_index; /**< immediate for default layer 0 */
320 } fs;
321
322 /* For geometry shaders only */
323 struct {
324 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
325 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
326 unsigned input_size; /**< size of input arrays */
327 unsigned prim_id_index; /**< primitive id register index */
328 unsigned max_out_vertices; /**< maximum number of output vertices */
329 unsigned invocations;
330 unsigned invocation_id_sys_index;
331
332 unsigned viewport_index_out_index;
333 unsigned viewport_index_tmp_index;
334 } gs;
335
336 /* For tessellation control shaders only */
337 struct {
338 unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */
339 unsigned imm_index; /**< immediate for tcs */
340 unsigned invocation_id_sys_index; /**< invocation id */
341 unsigned invocation_id_tmp_index;
342 unsigned instruction_token_pos; /* token pos for the first instruction */
343 unsigned control_point_input_index; /* control point input register index */
344 unsigned control_point_addr_index; /* control point input address register */
345 unsigned control_point_out_index; /* control point output register index */
346 unsigned control_point_tmp_index; /* control point temporary register */
347 unsigned control_point_out_count; /* control point output count */
348 boolean control_point_phase; /* true if in control point phase */
349 boolean fork_phase_add_signature; /* true if needs to add signature in fork phase */
350 unsigned patch_generic_out_count; /* per-patch generic output count */
351 unsigned patch_generic_out_index; /* per-patch generic output register index*/
352 unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/
353 unsigned prim_id_index; /* primitive id */
354 struct {
355 unsigned out_index; /* real tessinner output register */
356 unsigned temp_index; /* tessinner temp register */
357 unsigned tgsi_index; /* tgsi tessinner output register */
358 } inner;
359 struct {
360 unsigned out_index; /* real tessouter output register */
361 unsigned temp_index; /* tessouter temp register */
362 unsigned tgsi_index; /* tgsi tessouter output register */
363 } outer;
364 } tcs;
365
366 /* For tessellation evaluation shaders only */
367 struct {
368 enum pipe_prim_type prim_mode;
369 enum pipe_tess_spacing spacing;
370 boolean vertices_order_cw;
371 boolean point_mode;
372 unsigned tesscoord_sys_index;
373 unsigned prim_id_index; /* primitive id */
374 struct {
375 unsigned in_index; /* real tessinner input register */
376 unsigned temp_index; /* tessinner temp register */
377 unsigned tgsi_index; /* tgsi tessinner input register */
378 } inner;
379 struct {
380 unsigned in_index; /* real tessouter input register */
381 unsigned temp_index; /* tessouter temp register */
382 unsigned tgsi_index; /* tgsi tessouter input register */
383 } outer;
384 } tes;
385
386 /* For vertex or geometry shaders */
387 enum clipping_mode clip_mode;
388 unsigned clip_dist_out_index; /**< clip distance output register index */
389 unsigned clip_dist_tmp_index; /**< clip distance temporary register */
390 unsigned clip_dist_so_index; /**< clip distance shadow copy */
391
392 /** Index of temporary holding the clipvertex coordinate */
393 unsigned clip_vertex_out_index; /**< clip vertex output register index */
394 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
395
396 /* user clip plane constant slot indexes */
397 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
398
399 unsigned num_output_writes;
400 boolean constant_color_output;
401
402 boolean uses_flat_interp;
403
404 unsigned reserved_token; /* index to the reserved token */
405 boolean uses_precise_qualifier;
406
407 /* For all shaders: const reg index for RECT coord scaling */
408 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
409
410 /* For all shaders: const reg index for texture buffer size */
411 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
412
413 /* VS/TCS/TES/GS/FS Linkage info */
414 struct shader_linkage linkage;
415 struct tgsi_shader_info *prevShaderInfo;
416
417 /* Shader signature */
418 struct svga_shader_signature signature;
419
420 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
421
422 /* For pipe_debug_message */
423 struct pipe_debug_callback svga_debug_callback;
424
425 /* current loop depth in shader */
426 unsigned current_loop_depth;
427 };
428
429
430 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
431 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
432 static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
433 static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
434 static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
435 static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
436 static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
437 static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
438 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
439
440 static boolean
441 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
442
443 static boolean
444 emit_vertex(struct svga_shader_emitter_v10 *emit,
445 const struct tgsi_full_instruction *inst);
446
447 static boolean
448 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
449 unsigned inst_number,
450 const struct tgsi_full_instruction *inst);
451
452 static void
453 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
454 unsigned opcodeType, unsigned operandType,
455 unsigned dim, unsigned index, unsigned size,
456 unsigned name, unsigned numComp,
457 unsigned selMode, unsigned usageMask,
458 unsigned interpMode,
459 boolean addSignature,
460 SVGA3dDXSignatureSemanticName sgnName);
461
462 static void
463 create_temp_array(struct svga_shader_emitter_v10 *emit,
464 unsigned arrayID, unsigned first, unsigned count,
465 unsigned startIndex);
466
467 static char err_buf[128];
468
469 static boolean
expand(struct svga_shader_emitter_v10 * emit)470 expand(struct svga_shader_emitter_v10 *emit)
471 {
472 char *new_buf;
473 unsigned newsize = emit->size * 2;
474
475 if (emit->buf != err_buf)
476 new_buf = REALLOC(emit->buf, emit->size, newsize);
477 else
478 new_buf = NULL;
479
480 if (!new_buf) {
481 emit->ptr = err_buf;
482 emit->buf = err_buf;
483 emit->size = sizeof(err_buf);
484 return FALSE;
485 }
486
487 emit->size = newsize;
488 emit->ptr = new_buf + (emit->ptr - emit->buf);
489 emit->buf = new_buf;
490 return TRUE;
491 }
492
493 /**
494 * Create and initialize a new svga_shader_emitter_v10 object.
495 */
496 static struct svga_shader_emitter_v10 *
alloc_emitter(void)497 alloc_emitter(void)
498 {
499 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
500
501 if (!emit)
502 return NULL;
503
504 /* to initialize the output buffer */
505 emit->size = 512;
506 if (!expand(emit)) {
507 FREE(emit);
508 return NULL;
509 }
510 return emit;
511 }
512
513 /**
514 * Free an svga_shader_emitter_v10 object.
515 */
516 static void
free_emitter(struct svga_shader_emitter_v10 * emit)517 free_emitter(struct svga_shader_emitter_v10 *emit)
518 {
519 assert(emit);
520 FREE(emit->buf); /* will be NULL if translation succeeded */
521 FREE(emit);
522 }
523
524 static inline boolean
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)525 reserve(struct svga_shader_emitter_v10 *emit,
526 unsigned nr_dwords)
527 {
528 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
529 if (!expand(emit))
530 return FALSE;
531 }
532
533 return TRUE;
534 }
535
536 static boolean
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)537 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
538 {
539 if (!reserve(emit, 1))
540 return FALSE;
541
542 *(uint32 *)emit->ptr = dword;
543 emit->ptr += sizeof dword;
544 return TRUE;
545 }
546
547 static boolean
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)548 emit_dwords(struct svga_shader_emitter_v10 *emit,
549 const uint32 *dwords,
550 unsigned nr)
551 {
552 if (!reserve(emit, nr))
553 return FALSE;
554
555 memcpy(emit->ptr, dwords, nr * sizeof *dwords);
556 emit->ptr += nr * sizeof *dwords;
557 return TRUE;
558 }
559
560 /** Return the number of tokens in the emitter's buffer */
561 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)562 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
563 {
564 return (emit->ptr - emit->buf) / sizeof(unsigned);
565 }
566
567
568 /**
569 * Check for register overflow. If we overflow we'll set an
570 * error flag. This function can be called for register declarations
571 * or use as src/dst instruction operands.
572 * \param type register type. One of VGPU10_OPERAND_TYPE_x
573 or VGPU10_OPCODE_DCL_x
574 * \param index the register index
575 */
576 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)577 check_register_index(struct svga_shader_emitter_v10 *emit,
578 unsigned operandType, unsigned index)
579 {
580 bool overflow_before = emit->register_overflow;
581
582 switch (operandType) {
583 case VGPU10_OPERAND_TYPE_TEMP:
584 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
585 case VGPU10_OPCODE_DCL_TEMPS:
586 if (index >= VGPU10_MAX_TEMPS) {
587 emit->register_overflow = TRUE;
588 }
589 break;
590 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
591 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
592 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
593 emit->register_overflow = TRUE;
594 }
595 break;
596 case VGPU10_OPERAND_TYPE_INPUT:
597 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
598 case VGPU10_OPCODE_DCL_INPUT:
599 case VGPU10_OPCODE_DCL_INPUT_SGV:
600 case VGPU10_OPCODE_DCL_INPUT_SIV:
601 case VGPU10_OPCODE_DCL_INPUT_PS:
602 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
603 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
604 if ((emit->unit == PIPE_SHADER_VERTEX &&
605 index >= VGPU10_MAX_VS_INPUTS) ||
606 (emit->unit == PIPE_SHADER_GEOMETRY &&
607 index >= VGPU10_MAX_GS_INPUTS) ||
608 (emit->unit == PIPE_SHADER_FRAGMENT &&
609 index >= VGPU10_MAX_FS_INPUTS) ||
610 (emit->unit == PIPE_SHADER_TESS_CTRL &&
611 index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
612 (emit->unit == PIPE_SHADER_TESS_EVAL &&
613 index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
614 emit->register_overflow = TRUE;
615 }
616 break;
617 case VGPU10_OPERAND_TYPE_OUTPUT:
618 case VGPU10_OPCODE_DCL_OUTPUT:
619 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
620 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
621 /* Note: we are skipping two output indices in tcs for
622 * tessinner/outer levels. Implementation will not exceed
623 * number of output count but it allows index to go beyond
624 * VGPU11_MAX_HS_OUTPUTS.
625 * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
626 */
627 if ((emit->unit == PIPE_SHADER_VERTEX &&
628 index >= VGPU10_MAX_VS_OUTPUTS) ||
629 (emit->unit == PIPE_SHADER_GEOMETRY &&
630 index >= VGPU10_MAX_GS_OUTPUTS) ||
631 (emit->unit == PIPE_SHADER_FRAGMENT &&
632 index >= VGPU10_MAX_FS_OUTPUTS) ||
633 (emit->unit == PIPE_SHADER_TESS_CTRL &&
634 index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
635 (emit->unit == PIPE_SHADER_TESS_EVAL &&
636 index >= VGPU11_MAX_DS_OUTPUTS)) {
637 emit->register_overflow = TRUE;
638 }
639 break;
640 case VGPU10_OPERAND_TYPE_SAMPLER:
641 case VGPU10_OPCODE_DCL_SAMPLER:
642 if (index >= VGPU10_MAX_SAMPLERS) {
643 emit->register_overflow = TRUE;
644 }
645 break;
646 case VGPU10_OPERAND_TYPE_RESOURCE:
647 case VGPU10_OPCODE_DCL_RESOURCE:
648 if (index >= VGPU10_MAX_RESOURCES) {
649 emit->register_overflow = TRUE;
650 }
651 break;
652 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
653 if (index >= MAX_IMMEDIATE_COUNT) {
654 emit->register_overflow = TRUE;
655 }
656 break;
657 case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
658 /* nothing */
659 break;
660 default:
661 assert(0);
662 ; /* nothing */
663 }
664
665 if (emit->register_overflow && !overflow_before) {
666 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
667 operandType, index);
668 }
669 }
670
671
672 /**
673 * Examine misc state to determine the clipping mode.
674 */
675 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)676 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
677 {
678 /* num_written_clipdistance in the shader info for tessellation
679 * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
680 * is not defined for this shader. So we go through all the output declarations
681 * to set the num_written_clipdistance. This is just to determine the
682 * clipping mode.
683 */
684 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
685 unsigned i;
686 for (i = 0; i < emit->info.num_outputs; i++) {
687 if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
688 emit->info.num_written_clipdistance =
689 4 * (emit->info.output_semantic_index[i] + 1);
690 }
691 }
692 }
693
694 if (emit->info.num_written_clipdistance > 0) {
695 emit->clip_mode = CLIP_DISTANCE;
696 }
697 else if (emit->info.writes_clipvertex) {
698 emit->clip_mode = CLIP_VERTEX;
699 }
700 else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
701 /*
702 * Only the last shader in the vertex processing stage needs to
703 * handle the legacy clip mode.
704 */
705 emit->clip_mode = CLIP_LEGACY;
706 }
707 else {
708 emit->clip_mode = CLIP_NONE;
709 }
710 }
711
712
713 /**
714 * For clip distance register declarations and clip distance register
715 * writes we need to mask the declaration usage or instruction writemask
716 * (respectively) against the set of the really-enabled clipping planes.
717 *
718 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
719 * has a VS that writes to all 8 clip distance registers, but the plane enable
720 * flags are a subset of that.
721 *
722 * This function is used to apply the plane enable flags to the register
723 * declaration or instruction writemask.
724 *
725 * \param writemask the declaration usage mask or instruction writemask
726 * \param clip_reg_index which clip plane register is being declared/written.
727 * The legal values are 0 and 1 (two clip planes per
728 * register, for a total of 8 clip planes)
729 */
730 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)731 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
732 unsigned writemask, unsigned clip_reg_index)
733 {
734 unsigned shift;
735
736 assert(clip_reg_index < 2);
737
738 /* four clip planes per clip register: */
739 shift = clip_reg_index * 4;
740 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
741
742 return writemask;
743 }
744
745
746 /**
747 * Translate gallium shader type into VGPU10 type.
748 */
749 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)750 translate_shader_type(unsigned type)
751 {
752 switch (type) {
753 case PIPE_SHADER_VERTEX:
754 return VGPU10_VERTEX_SHADER;
755 case PIPE_SHADER_GEOMETRY:
756 return VGPU10_GEOMETRY_SHADER;
757 case PIPE_SHADER_FRAGMENT:
758 return VGPU10_PIXEL_SHADER;
759 case PIPE_SHADER_TESS_CTRL:
760 return VGPU10_HULL_SHADER;
761 case PIPE_SHADER_TESS_EVAL:
762 return VGPU10_DOMAIN_SHADER;
763 case PIPE_SHADER_COMPUTE:
764 return VGPU10_COMPUTE_SHADER;
765 default:
766 assert(!"Unexpected shader type");
767 return VGPU10_VERTEX_SHADER;
768 }
769 }
770
771
772 /**
773 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
774 * Note: we only need to translate the opcodes for "simple" instructions,
775 * as seen below. All other opcodes are handled/translated specially.
776 */
777 static VGPU10_OPCODE_TYPE
translate_opcode(enum tgsi_opcode opcode)778 translate_opcode(enum tgsi_opcode opcode)
779 {
780 switch (opcode) {
781 case TGSI_OPCODE_MOV:
782 return VGPU10_OPCODE_MOV;
783 case TGSI_OPCODE_MUL:
784 return VGPU10_OPCODE_MUL;
785 case TGSI_OPCODE_ADD:
786 return VGPU10_OPCODE_ADD;
787 case TGSI_OPCODE_DP3:
788 return VGPU10_OPCODE_DP3;
789 case TGSI_OPCODE_DP4:
790 return VGPU10_OPCODE_DP4;
791 case TGSI_OPCODE_MIN:
792 return VGPU10_OPCODE_MIN;
793 case TGSI_OPCODE_MAX:
794 return VGPU10_OPCODE_MAX;
795 case TGSI_OPCODE_MAD:
796 return VGPU10_OPCODE_MAD;
797 case TGSI_OPCODE_SQRT:
798 return VGPU10_OPCODE_SQRT;
799 case TGSI_OPCODE_FRC:
800 return VGPU10_OPCODE_FRC;
801 case TGSI_OPCODE_FLR:
802 return VGPU10_OPCODE_ROUND_NI;
803 case TGSI_OPCODE_FSEQ:
804 return VGPU10_OPCODE_EQ;
805 case TGSI_OPCODE_FSGE:
806 return VGPU10_OPCODE_GE;
807 case TGSI_OPCODE_FSNE:
808 return VGPU10_OPCODE_NE;
809 case TGSI_OPCODE_DDX:
810 return VGPU10_OPCODE_DERIV_RTX;
811 case TGSI_OPCODE_DDY:
812 return VGPU10_OPCODE_DERIV_RTY;
813 case TGSI_OPCODE_RET:
814 return VGPU10_OPCODE_RET;
815 case TGSI_OPCODE_DIV:
816 return VGPU10_OPCODE_DIV;
817 case TGSI_OPCODE_IDIV:
818 return VGPU10_OPCODE_VMWARE;
819 case TGSI_OPCODE_DP2:
820 return VGPU10_OPCODE_DP2;
821 case TGSI_OPCODE_BRK:
822 return VGPU10_OPCODE_BREAK;
823 case TGSI_OPCODE_IF:
824 return VGPU10_OPCODE_IF;
825 case TGSI_OPCODE_ELSE:
826 return VGPU10_OPCODE_ELSE;
827 case TGSI_OPCODE_ENDIF:
828 return VGPU10_OPCODE_ENDIF;
829 case TGSI_OPCODE_CEIL:
830 return VGPU10_OPCODE_ROUND_PI;
831 case TGSI_OPCODE_I2F:
832 return VGPU10_OPCODE_ITOF;
833 case TGSI_OPCODE_NOT:
834 return VGPU10_OPCODE_NOT;
835 case TGSI_OPCODE_TRUNC:
836 return VGPU10_OPCODE_ROUND_Z;
837 case TGSI_OPCODE_SHL:
838 return VGPU10_OPCODE_ISHL;
839 case TGSI_OPCODE_AND:
840 return VGPU10_OPCODE_AND;
841 case TGSI_OPCODE_OR:
842 return VGPU10_OPCODE_OR;
843 case TGSI_OPCODE_XOR:
844 return VGPU10_OPCODE_XOR;
845 case TGSI_OPCODE_CONT:
846 return VGPU10_OPCODE_CONTINUE;
847 case TGSI_OPCODE_EMIT:
848 return VGPU10_OPCODE_EMIT;
849 case TGSI_OPCODE_ENDPRIM:
850 return VGPU10_OPCODE_CUT;
851 case TGSI_OPCODE_BGNLOOP:
852 return VGPU10_OPCODE_LOOP;
853 case TGSI_OPCODE_ENDLOOP:
854 return VGPU10_OPCODE_ENDLOOP;
855 case TGSI_OPCODE_ENDSUB:
856 return VGPU10_OPCODE_RET;
857 case TGSI_OPCODE_NOP:
858 return VGPU10_OPCODE_NOP;
859 case TGSI_OPCODE_END:
860 return VGPU10_OPCODE_RET;
861 case TGSI_OPCODE_F2I:
862 return VGPU10_OPCODE_FTOI;
863 case TGSI_OPCODE_IMAX:
864 return VGPU10_OPCODE_IMAX;
865 case TGSI_OPCODE_IMIN:
866 return VGPU10_OPCODE_IMIN;
867 case TGSI_OPCODE_UDIV:
868 case TGSI_OPCODE_UMOD:
869 case TGSI_OPCODE_MOD:
870 return VGPU10_OPCODE_UDIV;
871 case TGSI_OPCODE_IMUL_HI:
872 return VGPU10_OPCODE_IMUL;
873 case TGSI_OPCODE_INEG:
874 return VGPU10_OPCODE_INEG;
875 case TGSI_OPCODE_ISHR:
876 return VGPU10_OPCODE_ISHR;
877 case TGSI_OPCODE_ISGE:
878 return VGPU10_OPCODE_IGE;
879 case TGSI_OPCODE_ISLT:
880 return VGPU10_OPCODE_ILT;
881 case TGSI_OPCODE_F2U:
882 return VGPU10_OPCODE_FTOU;
883 case TGSI_OPCODE_UADD:
884 return VGPU10_OPCODE_IADD;
885 case TGSI_OPCODE_U2F:
886 return VGPU10_OPCODE_UTOF;
887 case TGSI_OPCODE_UCMP:
888 return VGPU10_OPCODE_MOVC;
889 case TGSI_OPCODE_UMAD:
890 return VGPU10_OPCODE_UMAD;
891 case TGSI_OPCODE_UMAX:
892 return VGPU10_OPCODE_UMAX;
893 case TGSI_OPCODE_UMIN:
894 return VGPU10_OPCODE_UMIN;
895 case TGSI_OPCODE_UMUL:
896 case TGSI_OPCODE_UMUL_HI:
897 return VGPU10_OPCODE_UMUL;
898 case TGSI_OPCODE_USEQ:
899 return VGPU10_OPCODE_IEQ;
900 case TGSI_OPCODE_USGE:
901 return VGPU10_OPCODE_UGE;
902 case TGSI_OPCODE_USHR:
903 return VGPU10_OPCODE_USHR;
904 case TGSI_OPCODE_USLT:
905 return VGPU10_OPCODE_ULT;
906 case TGSI_OPCODE_USNE:
907 return VGPU10_OPCODE_INE;
908 case TGSI_OPCODE_SWITCH:
909 return VGPU10_OPCODE_SWITCH;
910 case TGSI_OPCODE_CASE:
911 return VGPU10_OPCODE_CASE;
912 case TGSI_OPCODE_DEFAULT:
913 return VGPU10_OPCODE_DEFAULT;
914 case TGSI_OPCODE_ENDSWITCH:
915 return VGPU10_OPCODE_ENDSWITCH;
916 case TGSI_OPCODE_FSLT:
917 return VGPU10_OPCODE_LT;
918 case TGSI_OPCODE_ROUND:
919 return VGPU10_OPCODE_ROUND_NE;
920 /* Begin SM5 opcodes */
921 case TGSI_OPCODE_F2D:
922 return VGPU10_OPCODE_FTOD;
923 case TGSI_OPCODE_D2F:
924 return VGPU10_OPCODE_DTOF;
925 case TGSI_OPCODE_DMUL:
926 return VGPU10_OPCODE_DMUL;
927 case TGSI_OPCODE_DADD:
928 return VGPU10_OPCODE_DADD;
929 case TGSI_OPCODE_DMAX:
930 return VGPU10_OPCODE_DMAX;
931 case TGSI_OPCODE_DMIN:
932 return VGPU10_OPCODE_DMIN;
933 case TGSI_OPCODE_DSEQ:
934 return VGPU10_OPCODE_DEQ;
935 case TGSI_OPCODE_DSGE:
936 return VGPU10_OPCODE_DGE;
937 case TGSI_OPCODE_DSLT:
938 return VGPU10_OPCODE_DLT;
939 case TGSI_OPCODE_DSNE:
940 return VGPU10_OPCODE_DNE;
941 case TGSI_OPCODE_IBFE:
942 return VGPU10_OPCODE_IBFE;
943 case TGSI_OPCODE_UBFE:
944 return VGPU10_OPCODE_UBFE;
945 case TGSI_OPCODE_BFI:
946 return VGPU10_OPCODE_BFI;
947 case TGSI_OPCODE_BREV:
948 return VGPU10_OPCODE_BFREV;
949 case TGSI_OPCODE_POPC:
950 return VGPU10_OPCODE_COUNTBITS;
951 case TGSI_OPCODE_LSB:
952 return VGPU10_OPCODE_FIRSTBIT_LO;
953 case TGSI_OPCODE_IMSB:
954 return VGPU10_OPCODE_FIRSTBIT_SHI;
955 case TGSI_OPCODE_UMSB:
956 return VGPU10_OPCODE_FIRSTBIT_HI;
957 case TGSI_OPCODE_INTERP_CENTROID:
958 return VGPU10_OPCODE_EVAL_CENTROID;
959 case TGSI_OPCODE_INTERP_SAMPLE:
960 return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
961 case TGSI_OPCODE_BARRIER:
962 return VGPU10_OPCODE_SYNC;
963
964 /* DX11.1 Opcodes */
965 case TGSI_OPCODE_DDIV:
966 return VGPU10_OPCODE_DDIV;
967 case TGSI_OPCODE_DRCP:
968 return VGPU10_OPCODE_DRCP;
969 case TGSI_OPCODE_D2I:
970 return VGPU10_OPCODE_DTOI;
971 case TGSI_OPCODE_D2U:
972 return VGPU10_OPCODE_DTOU;
973 case TGSI_OPCODE_I2D:
974 return VGPU10_OPCODE_ITOD;
975 case TGSI_OPCODE_U2D:
976 return VGPU10_OPCODE_UTOD;
977
978 case TGSI_OPCODE_SAMPLE_POS:
979 /* Note: we never actually get this opcode because there's no GLSL
980 * function to query multisample resource sample positions. There's
981 * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
982 * position of the current sample in the render target.
983 */
984 FALLTHROUGH;
985 case TGSI_OPCODE_SAMPLE_INFO:
986 /* NOTE: we never actually get this opcode because the GLSL compiler
987 * implements the gl_NumSamples variable with a simple constant in the
988 * constant buffer.
989 */
990 FALLTHROUGH;
991 default:
992 assert(!"Unexpected TGSI opcode in translate_opcode()");
993 return VGPU10_OPCODE_NOP;
994 }
995 }
996
997
998 /**
999 * Translate a TGSI register file type into a VGPU10 operand type.
1000 * \param array is the TGSI_FILE_TEMPORARY register an array?
1001 */
1002 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,boolean array)1003 translate_register_file(enum tgsi_file_type file, boolean array)
1004 {
1005 switch (file) {
1006 case TGSI_FILE_CONSTANT:
1007 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1008 case TGSI_FILE_INPUT:
1009 return VGPU10_OPERAND_TYPE_INPUT;
1010 case TGSI_FILE_OUTPUT:
1011 return VGPU10_OPERAND_TYPE_OUTPUT;
1012 case TGSI_FILE_TEMPORARY:
1013 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1014 : VGPU10_OPERAND_TYPE_TEMP;
1015 case TGSI_FILE_IMMEDIATE:
1016 /* all immediates are 32-bit values at this time so
1017 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1018 */
1019 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1020 case TGSI_FILE_SAMPLER:
1021 return VGPU10_OPERAND_TYPE_SAMPLER;
1022 case TGSI_FILE_SYSTEM_VALUE:
1023 return VGPU10_OPERAND_TYPE_INPUT;
1024
1025 /* XXX TODO more cases to finish */
1026
1027 default:
1028 assert(!"Bad tgsi register file!");
1029 return VGPU10_OPERAND_TYPE_NULL;
1030 }
1031 }
1032
1033
1034 /**
1035 * Emit a null dst register
1036 */
1037 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)1038 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1039 {
1040 VGPU10OperandToken0 operand;
1041
1042 operand.value = 0;
1043 operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1044 operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1045
1046 emit_dword(emit, operand.value);
1047 }
1048
1049
1050 /**
1051 * If the given register is a temporary, return the array ID.
1052 * Else return zero.
1053 */
1054 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1055 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1056 enum tgsi_file_type file, unsigned index)
1057 {
1058 if (file == TGSI_FILE_TEMPORARY) {
1059 return emit->temp_map[index].arrayId;
1060 }
1061 else {
1062 return 0;
1063 }
1064 }
1065
1066
1067 /**
1068 * If the given register is a temporary, convert the index from a TGSI
1069 * TEMPORARY index to a VGPU10 temp index.
1070 */
1071 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1072 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1073 enum tgsi_file_type file, unsigned index)
1074 {
1075 if (file == TGSI_FILE_TEMPORARY) {
1076 return emit->temp_map[index].index;
1077 }
1078 else {
1079 return index;
1080 }
1081 }
1082
1083
1084 /**
1085 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1086 * Note: the operandType field must already be initialized.
1087 * \param file the register file being accessed
1088 * \param indirect using indirect addressing of the register file?
1089 * \param index2D if true, 2-D indexing is being used (const or temp registers)
1090 * \param indirect2D if true, 2-D indirect indexing being used (for const buf)
1091 */
1092 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,boolean indirect,boolean index2D,bool indirect2D)1093 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1094 VGPU10OperandToken0 operand0,
1095 enum tgsi_file_type file,
1096 boolean indirect,
1097 boolean index2D, bool indirect2D)
1098 {
1099 VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1100 VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1101
1102 /*
1103 * Compute index dimensions
1104 */
1105 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1106 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1107 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1108 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1109 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1110 operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1111 /* there's no swizzle for in-line immediates */
1112 indexDim = VGPU10_OPERAND_INDEX_0D;
1113 assert(operand0.selectionMode == 0);
1114 }
1115 else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1116 indexDim = VGPU10_OPERAND_INDEX_0D;
1117 }
1118 else {
1119 indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1120 }
1121
1122 /*
1123 * Compute index representation(s) (immediate vs relative).
1124 */
1125 if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1126 index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1127 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1128
1129 index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1130 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1131 }
1132 else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1133 index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1134 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1135
1136 index1Rep = 0;
1137 }
1138 else {
1139 index0Rep = 0;
1140 index1Rep = 0;
1141 }
1142
1143 operand0.indexDimension = indexDim;
1144 operand0.index0Representation = index0Rep;
1145 operand0.index1Representation = index1Rep;
1146
1147 return operand0;
1148 }
1149
1150
1151 /**
1152 * Emit the operand for expressing an address register for indirect indexing.
1153 * Note that the address register is really just a temp register.
1154 * \param addr_reg_index which address register to use
1155 */
1156 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)1157 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1158 unsigned addr_reg_index)
1159 {
1160 unsigned tmp_reg_index;
1161 VGPU10OperandToken0 operand0;
1162
1163 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1164
1165 tmp_reg_index = emit->address_reg_index[addr_reg_index];
1166
1167 /* operand0 is a simple temporary register, selecting one component */
1168 operand0.value = 0;
1169 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1170 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1171 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1172 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1173 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1174 operand0.swizzleX = 0;
1175 operand0.swizzleY = 1;
1176 operand0.swizzleZ = 2;
1177 operand0.swizzleW = 3;
1178
1179 emit_dword(emit, operand0.value);
1180 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1181 }
1182
1183
1184 /**
1185 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1186 * \param emit the emitter context
1187 * \param reg the TGSI dst register to translate
1188 */
1189 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)1190 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1191 const struct tgsi_full_dst_register *reg)
1192 {
1193 enum tgsi_file_type file = reg->Register.File;
1194 unsigned index = reg->Register.Index;
1195 const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1196 const unsigned sem_index = emit->info.output_semantic_index[index];
1197 unsigned writemask = reg->Register.WriteMask;
1198 const boolean indirect = reg->Register.Indirect;
1199 unsigned tempArrayId = get_temp_array_id(emit, file, index);
1200 boolean index2d = reg->Register.Dimension || tempArrayId > 0;
1201 VGPU10OperandToken0 operand0;
1202
1203 if (file == TGSI_FILE_TEMPORARY) {
1204 emit->temp_map[index].initialized = TRUE;
1205 }
1206
1207 if (file == TGSI_FILE_OUTPUT) {
1208 if (emit->unit == PIPE_SHADER_VERTEX ||
1209 emit->unit == PIPE_SHADER_GEOMETRY ||
1210 emit->unit == PIPE_SHADER_TESS_EVAL) {
1211 if (index == emit->vposition.out_index &&
1212 emit->vposition.tmp_index != INVALID_INDEX) {
1213 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the
1214 * vertex position result in a temporary so that we can modify
1215 * it in the post_helper() code.
1216 */
1217 file = TGSI_FILE_TEMPORARY;
1218 index = emit->vposition.tmp_index;
1219 }
1220 else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1221 emit->clip_dist_tmp_index != INVALID_INDEX) {
1222 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1223 * We store the clip distance in a temporary first, then
1224 * we'll copy it to the shadow copy and to CLIPDIST with the
1225 * enabled planes mask in emit_clip_distance_instructions().
1226 */
1227 file = TGSI_FILE_TEMPORARY;
1228 index = emit->clip_dist_tmp_index + sem_index;
1229 }
1230 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1231 emit->clip_vertex_tmp_index != INVALID_INDEX) {
1232 /* replace the CLIPVERTEX output register with a temporary */
1233 assert(emit->clip_mode == CLIP_VERTEX);
1234 assert(sem_index == 0);
1235 file = TGSI_FILE_TEMPORARY;
1236 index = emit->clip_vertex_tmp_index;
1237 }
1238 else if (sem_name == TGSI_SEMANTIC_COLOR &&
1239 emit->key.clamp_vertex_color) {
1240
1241 /* set the saturate modifier of the instruction
1242 * to clamp the vertex color.
1243 */
1244 VGPU10OpcodeToken0 *token =
1245 (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1246 token->saturate = TRUE;
1247 }
1248 else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1249 emit->gs.viewport_index_out_index != INVALID_INDEX) {
1250 file = TGSI_FILE_TEMPORARY;
1251 index = emit->gs.viewport_index_tmp_index;
1252 }
1253 }
1254 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1255 if (sem_name == TGSI_SEMANTIC_POSITION) {
1256 /* Fragment depth output register */
1257 operand0.value = 0;
1258 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1259 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1260 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1261 emit_dword(emit, operand0.value);
1262 return;
1263 }
1264 else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1265 /* Fragment sample mask output */
1266 operand0.value = 0;
1267 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1268 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1269 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1270 emit_dword(emit, operand0.value);
1271 return;
1272 }
1273 else if (index == emit->fs.color_out_index[0] &&
1274 emit->fs.color_tmp_index != INVALID_INDEX) {
1275 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the
1276 * fragment color result in a temporary so that we can read it
1277 * it in the post_helper() code.
1278 */
1279 file = TGSI_FILE_TEMPORARY;
1280 index = emit->fs.color_tmp_index;
1281 }
1282 else {
1283 /* Typically, for fragment shaders, the output register index
1284 * matches the color semantic index. But not when we write to
1285 * the fragment depth register. In that case, OUT[0] will be
1286 * fragdepth and OUT[1] will be the 0th color output. We need
1287 * to use the semantic index for color outputs.
1288 */
1289 assert(sem_name == TGSI_SEMANTIC_COLOR);
1290 index = emit->info.output_semantic_index[index];
1291
1292 emit->num_output_writes++;
1293 }
1294 }
1295 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1296 if (index == emit->tcs.inner.tgsi_index) {
1297 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1298 * in temporary for now so that will be store into appropriate
1299 * registers in post_helper() in patch constant phase.
1300 */
1301 if (emit->tcs.control_point_phase) {
1302 /* Discard writing into tessfactor in control point phase */
1303 emit->discard_instruction = TRUE;
1304 }
1305 else {
1306 file = TGSI_FILE_TEMPORARY;
1307 index = emit->tcs.inner.temp_index;
1308 }
1309 }
1310 else if (index == emit->tcs.outer.tgsi_index) {
1311 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1312 * in temporary for now so that will be store into appropriate
1313 * registers in post_helper().
1314 */
1315 if (emit->tcs.control_point_phase) {
1316 /* Discard writing into tessfactor in control point phase */
1317 emit->discard_instruction = TRUE;
1318 }
1319 else {
1320 file = TGSI_FILE_TEMPORARY;
1321 index = emit->tcs.outer.temp_index;
1322 }
1323 }
1324 else if (index >= emit->tcs.patch_generic_out_index &&
1325 index < (emit->tcs.patch_generic_out_index +
1326 emit->tcs.patch_generic_out_count)) {
1327 if (emit->tcs.control_point_phase) {
1328 /* Discard writing into generic patch constant outputs in
1329 control point phase */
1330 emit->discard_instruction = TRUE;
1331 }
1332 else {
1333 if (emit->reemit_instruction) {
1334 /* Store results of reemitted instruction in temporary register. */
1335 file = TGSI_FILE_TEMPORARY;
1336 index = emit->tcs.patch_generic_tmp_index +
1337 (index - emit->tcs.patch_generic_out_index);
1338 /**
1339 * Temporaries for patch constant data can be done
1340 * as indexable temporaries.
1341 */
1342 tempArrayId = get_temp_array_id(emit, file, index);
1343 index2d = tempArrayId > 0;
1344
1345 emit->reemit_instruction = FALSE;
1346 }
1347 else {
1348 /* If per-patch outputs is been read in shader, we
1349 * reemit instruction and store results in temporaries in
1350 * patch constant phase. */
1351 if (emit->info.reads_perpatch_outputs) {
1352 emit->reemit_instruction = TRUE;
1353 }
1354 }
1355 }
1356 }
1357 else if (reg->Register.Dimension) {
1358 /* Only control point outputs are declared 2D in tgsi */
1359 if (emit->tcs.control_point_phase) {
1360 if (emit->reemit_instruction) {
1361 /* Store results of reemitted instruction in temporary register. */
1362 index2d = FALSE;
1363 file = TGSI_FILE_TEMPORARY;
1364 index = emit->tcs.control_point_tmp_index +
1365 (index - emit->tcs.control_point_out_index);
1366 emit->reemit_instruction = FALSE;
1367 }
1368 else {
1369 /* The mapped control point outputs are 1-D */
1370 index2d = FALSE;
1371 if (emit->info.reads_pervertex_outputs) {
1372 /* If per-vertex outputs is been read in shader, we
1373 * reemit instruction and store results in temporaries
1374 * control point phase. */
1375 emit->reemit_instruction = TRUE;
1376 }
1377 }
1378
1379 if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1380 emit->clip_dist_tmp_index != INVALID_INDEX) {
1381 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1382 * We store the clip distance in a temporary first, then
1383 * we'll copy it to the shadow copy and to CLIPDIST with the
1384 * enabled planes mask in emit_clip_distance_instructions().
1385 */
1386 file = TGSI_FILE_TEMPORARY;
1387 index = emit->clip_dist_tmp_index + sem_index;
1388 }
1389 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1390 emit->clip_vertex_tmp_index != INVALID_INDEX) {
1391 /* replace the CLIPVERTEX output register with a temporary */
1392 assert(emit->clip_mode == CLIP_VERTEX);
1393 assert(sem_index == 0);
1394 file = TGSI_FILE_TEMPORARY;
1395 index = emit->clip_vertex_tmp_index;
1396 }
1397 }
1398 else {
1399 /* Discard writing into control point outputs in
1400 patch constant phase */
1401 emit->discard_instruction = TRUE;
1402 }
1403 }
1404 }
1405 }
1406
1407 /* init operand tokens to all zero */
1408 operand0.value = 0;
1409
1410 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1411
1412 /* the operand has a writemask */
1413 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1414
1415 /* Which of the four dest components to write to. Note that we can use a
1416 * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1417 */
1418 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1419 operand0.mask = writemask;
1420
1421 /* translate TGSI register file type to VGPU10 operand type */
1422 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1423
1424 check_register_index(emit, operand0.operandType, index);
1425
1426 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1427 index2d, FALSE);
1428
1429 /* Emit tokens */
1430 emit_dword(emit, operand0.value);
1431 if (tempArrayId > 0) {
1432 emit_dword(emit, tempArrayId);
1433 }
1434
1435 emit_dword(emit, remap_temp_index(emit, file, index));
1436
1437 if (indirect) {
1438 emit_indirect_register(emit, reg->Indirect.Index);
1439 }
1440 }
1441
1442
1443 /**
1444 * Check if temporary register needs to be initialize when
1445 * shader is not using indirect addressing for temporary and uninitialized
1446 * temporary is not used in loop. In these two scenarios, we cannot
1447 * determine if temporary is initialized or not.
1448 */
1449 static boolean
need_temp_reg_initialization(struct svga_shader_emitter_v10 * emit,unsigned index)1450 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1451 unsigned index)
1452 {
1453 if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1454 && emit->current_loop_depth == 0) {
1455 if (!emit->temp_map[index].initialized &&
1456 emit->temp_map[index].index < emit->num_shader_temps) {
1457 return TRUE;
1458 }
1459 }
1460
1461 return FALSE;
1462 }
1463
1464
1465 /**
1466 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1467 * In quite a few cases, we do register substitution. For example, if
1468 * the TGSI register is the front/back-face register, we replace that with
1469 * a temp register containing a value we computed earlier.
1470 */
1471 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)1472 emit_src_register(struct svga_shader_emitter_v10 *emit,
1473 const struct tgsi_full_src_register *reg)
1474 {
1475 enum tgsi_file_type file = reg->Register.File;
1476 unsigned index = reg->Register.Index;
1477 const boolean indirect = reg->Register.Indirect;
1478 unsigned tempArrayId = get_temp_array_id(emit, file, index);
1479 boolean index2d = (reg->Register.Dimension ||
1480 tempArrayId > 0 ||
1481 file == TGSI_FILE_CONSTANT);
1482 unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1483 boolean indirect2d = reg->Dimension.Indirect;
1484 unsigned swizzleX = reg->Register.SwizzleX;
1485 unsigned swizzleY = reg->Register.SwizzleY;
1486 unsigned swizzleZ = reg->Register.SwizzleZ;
1487 unsigned swizzleW = reg->Register.SwizzleW;
1488 const boolean absolute = reg->Register.Absolute;
1489 const boolean negate = reg->Register.Negate;
1490 VGPU10OperandToken0 operand0;
1491 VGPU10OperandToken1 operand1;
1492
1493 operand0.value = operand1.value = 0;
1494
1495 if (emit->unit == PIPE_SHADER_FRAGMENT){
1496 if (file == TGSI_FILE_INPUT) {
1497 if (index == emit->fs.face_input_index) {
1498 /* Replace INPUT[FACE] with TEMP[FACE] */
1499 file = TGSI_FILE_TEMPORARY;
1500 index = emit->fs.face_tmp_index;
1501 }
1502 else if (index == emit->fs.fragcoord_input_index) {
1503 /* Replace INPUT[POSITION] with TEMP[POSITION] */
1504 file = TGSI_FILE_TEMPORARY;
1505 index = emit->fs.fragcoord_tmp_index;
1506 }
1507 else if (index == emit->fs.layer_input_index) {
1508 /* Replace INPUT[LAYER] with zero.x */
1509 file = TGSI_FILE_IMMEDIATE;
1510 index = emit->fs.layer_imm_index;
1511 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1512 }
1513 else {
1514 /* We remap fragment shader inputs to that FS input indexes
1515 * match up with VS/GS output indexes.
1516 */
1517 index = emit->linkage.input_map[index];
1518 }
1519 }
1520 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1521 if (index == emit->fs.sample_pos_sys_index) {
1522 assert(emit->version >= 41);
1523 /* Current sample position is in a temp register */
1524 file = TGSI_FILE_TEMPORARY;
1525 index = emit->fs.sample_pos_tmp_index;
1526 }
1527 else if (index == emit->fs.sample_mask_in_sys_index) {
1528 /* Emitted as vCoverage0.x */
1529 /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1530 * elements where s is the maximum number of color samples supported
1531 * by the implementation. With current implementation, we should not
1532 * have more than one element. So assert if Index != 0
1533 */
1534 assert((!reg->Register.Indirect && reg->Register.Index == 0) ||
1535 reg->Register.Indirect);
1536 operand0.value = 0;
1537 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1538 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1539 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1540 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1541 emit_dword(emit, operand0.value);
1542 return;
1543 }
1544 else {
1545 /* Map the TGSI system value to a VGPU10 input register */
1546 assert(index < ARRAY_SIZE(emit->system_value_indexes));
1547 file = TGSI_FILE_INPUT;
1548 index = emit->system_value_indexes[index];
1549 }
1550 }
1551 }
1552 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1553 if (file == TGSI_FILE_INPUT) {
1554 if (index == emit->gs.prim_id_index) {
1555 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1556 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1557 }
1558 index = emit->linkage.input_map[index];
1559 }
1560 else if (file == TGSI_FILE_SYSTEM_VALUE &&
1561 index == emit->gs.invocation_id_sys_index) {
1562 /* Emitted as vGSInstanceID0.x */
1563 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1564 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1565 index = 0;
1566 }
1567 }
1568 else if (emit->unit == PIPE_SHADER_VERTEX) {
1569 if (file == TGSI_FILE_INPUT) {
1570 /* if input is adjusted... */
1571 if ((emit->key.vs.adjust_attrib_w_1 |
1572 emit->key.vs.adjust_attrib_itof |
1573 emit->key.vs.adjust_attrib_utof |
1574 emit->key.vs.attrib_is_bgra |
1575 emit->key.vs.attrib_puint_to_snorm |
1576 emit->key.vs.attrib_puint_to_uscaled |
1577 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1578 file = TGSI_FILE_TEMPORARY;
1579 index = emit->vs.adjusted_input[index];
1580 }
1581 }
1582 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1583 if (index == emit->vs.vertex_id_sys_index &&
1584 emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1585 file = TGSI_FILE_TEMPORARY;
1586 index = emit->vs.vertex_id_tmp_index;
1587 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1588 }
1589 else {
1590 /* Map the TGSI system value to a VGPU10 input register */
1591 assert(index < ARRAY_SIZE(emit->system_value_indexes));
1592 file = TGSI_FILE_INPUT;
1593 index = emit->system_value_indexes[index];
1594 }
1595 }
1596 }
1597 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1598
1599 if (file == TGSI_FILE_SYSTEM_VALUE) {
1600 if (index == emit->tcs.vertices_per_patch_index) {
1601 /**
1602 * if source register is the system value for vertices_per_patch,
1603 * replace it with the immediate.
1604 */
1605 file = TGSI_FILE_IMMEDIATE;
1606 index = emit->tcs.imm_index;
1607 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1608 }
1609 else if (index == emit->tcs.invocation_id_sys_index) {
1610 if (emit->tcs.control_point_phase) {
1611 /**
1612 * Emitted as vOutputControlPointID.x
1613 */
1614 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1615 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1616 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1617 operand0.mask = 0;
1618 emit_dword(emit, operand0.value);
1619 return;
1620 }
1621 else {
1622 /* There is no control point ID input declaration in
1623 * the patch constant phase in hull shader.
1624 * Since for now we are emitting all instructions in
1625 * the patch constant phase, we are replacing the
1626 * control point ID reference with the immediate 0.
1627 */
1628 file = TGSI_FILE_IMMEDIATE;
1629 index = emit->tcs.imm_index;
1630 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1631 }
1632 }
1633 else if (index == emit->tcs.prim_id_index) {
1634 /**
1635 * Emitted as vPrim.x
1636 */
1637 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1638 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1639 index = 0;
1640 }
1641 }
1642 else if (file == TGSI_FILE_INPUT) {
1643 index = emit->linkage.input_map[index];
1644 if (!emit->tcs.control_point_phase) {
1645 /* Emitted as vicp */
1646 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1647 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1648 assert(reg->Register.Dimension);
1649 }
1650 }
1651 else if (file == TGSI_FILE_OUTPUT) {
1652 if ((index >= emit->tcs.patch_generic_out_index &&
1653 index < (emit->tcs.patch_generic_out_index +
1654 emit->tcs.patch_generic_out_count)) ||
1655 index == emit->tcs.inner.tgsi_index ||
1656 index == emit->tcs.outer.tgsi_index) {
1657 if (emit->tcs.control_point_phase) {
1658 emit->discard_instruction = TRUE;
1659 }
1660 else {
1661 /* Device doesn't allow reading from output so
1662 * use corresponding temporary register as source */
1663 file = TGSI_FILE_TEMPORARY;
1664 if (index == emit->tcs.inner.tgsi_index) {
1665 index = emit->tcs.inner.temp_index;
1666 }
1667 else if (index == emit->tcs.outer.tgsi_index) {
1668 index = emit->tcs.outer.temp_index;
1669 }
1670 else {
1671 index = emit->tcs.patch_generic_tmp_index +
1672 (index - emit->tcs.patch_generic_out_index);
1673 }
1674
1675 /**
1676 * Temporaries for patch constant data can be done
1677 * as indexable temporaries.
1678 */
1679 tempArrayId = get_temp_array_id(emit, file, index);
1680 index2d = tempArrayId > 0;
1681 index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1682 }
1683 }
1684 else if (index2d) {
1685 if (emit->tcs.control_point_phase) {
1686 /* Device doesn't allow reading from output so
1687 * use corresponding temporary register as source */
1688 file = TGSI_FILE_TEMPORARY;
1689 index2d = FALSE;
1690 index = emit->tcs.control_point_tmp_index +
1691 (index - emit->tcs.control_point_out_index);
1692 }
1693 else {
1694 emit->discard_instruction = TRUE;
1695 }
1696 }
1697 }
1698 }
1699 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1700 if (file == TGSI_FILE_SYSTEM_VALUE) {
1701 if (index == emit->tes.tesscoord_sys_index) {
1702 /**
1703 * Emitted as vDomain
1704 */
1705 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1706 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1707 index = 0;
1708 }
1709 else if (index == emit->tes.inner.tgsi_index) {
1710 file = TGSI_FILE_TEMPORARY;
1711 index = emit->tes.inner.temp_index;
1712 }
1713 else if (index == emit->tes.outer.tgsi_index) {
1714 file = TGSI_FILE_TEMPORARY;
1715 index = emit->tes.outer.temp_index;
1716 }
1717 else if (index == emit->tes.prim_id_index) {
1718 /**
1719 * Emitted as vPrim.x
1720 */
1721 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1722 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1723 index = 0;
1724 }
1725
1726 }
1727 else if (file == TGSI_FILE_INPUT) {
1728 if (index2d) {
1729 /* 2D input is emitted as vcp (input control point). */
1730 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1731 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1732
1733 /* index specifies the element index and is remapped
1734 * to align with the tcs output index.
1735 */
1736 index = emit->linkage.input_map[index];
1737
1738 assert(index2 < emit->key.tes.vertices_per_patch);
1739 }
1740 else {
1741 if (index < emit->key.tes.tessfactor_index)
1742 /* index specifies the generic patch index.
1743 * Remapped to match up with the tcs output index.
1744 */
1745 index = emit->linkage.input_map[index];
1746
1747 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1748 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1749 }
1750 }
1751 }
1752
1753 if (file == TGSI_FILE_ADDRESS) {
1754 index = emit->address_reg_index[index];
1755 file = TGSI_FILE_TEMPORARY;
1756 }
1757
1758 if (file == TGSI_FILE_TEMPORARY) {
1759 if (need_temp_reg_initialization(emit, index)) {
1760 emit->initialize_temp_index = index;
1761 emit->discard_instruction = TRUE;
1762 }
1763 }
1764
1765 if (operand0.value == 0) {
1766 /* if operand0 was not set above for a special case, do the general
1767 * case now.
1768 */
1769 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1770 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1771 }
1772 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1773 index2d, indirect2d);
1774
1775 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1776 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1777 /* there's no swizzle for in-line immediates */
1778 if (swizzleX == swizzleY &&
1779 swizzleX == swizzleZ &&
1780 swizzleX == swizzleW) {
1781 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1782 }
1783 else {
1784 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1785 }
1786
1787 operand0.swizzleX = swizzleX;
1788 operand0.swizzleY = swizzleY;
1789 operand0.swizzleZ = swizzleZ;
1790 operand0.swizzleW = swizzleW;
1791
1792 if (absolute || negate) {
1793 operand0.extended = 1;
1794 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1795 if (absolute && !negate)
1796 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1797 if (!absolute && negate)
1798 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1799 if (absolute && negate)
1800 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1801 }
1802 }
1803
1804 /* Emit the operand tokens */
1805 emit_dword(emit, operand0.value);
1806 if (operand0.extended)
1807 emit_dword(emit, operand1.value);
1808
1809 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1810 /* Emit the four float/int in-line immediate values */
1811 unsigned *c;
1812 assert(index < ARRAY_SIZE(emit->immediates));
1813 assert(file == TGSI_FILE_IMMEDIATE);
1814 assert(swizzleX < 4);
1815 assert(swizzleY < 4);
1816 assert(swizzleZ < 4);
1817 assert(swizzleW < 4);
1818 c = (unsigned *) emit->immediates[index];
1819 emit_dword(emit, c[swizzleX]);
1820 emit_dword(emit, c[swizzleY]);
1821 emit_dword(emit, c[swizzleZ]);
1822 emit_dword(emit, c[swizzleW]);
1823 }
1824 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1825 /* Emit the register index(es) */
1826 if (index2d) {
1827 emit_dword(emit, index2);
1828
1829 if (indirect2d) {
1830 emit_indirect_register(emit, reg->DimIndirect.Index);
1831 }
1832 }
1833
1834 emit_dword(emit, remap_temp_index(emit, file, index));
1835
1836 if (indirect) {
1837 emit_indirect_register(emit, reg->Indirect.Index);
1838 }
1839 }
1840 }
1841
1842
1843 /**
1844 * Emit a resource operand (for use with a SAMPLE instruction).
1845 */
1846 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)1847 emit_resource_register(struct svga_shader_emitter_v10 *emit,
1848 unsigned resource_number)
1849 {
1850 VGPU10OperandToken0 operand0;
1851
1852 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1853
1854 /* init */
1855 operand0.value = 0;
1856
1857 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1858 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1859 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1860 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1861 operand0.swizzleX = VGPU10_COMPONENT_X;
1862 operand0.swizzleY = VGPU10_COMPONENT_Y;
1863 operand0.swizzleZ = VGPU10_COMPONENT_Z;
1864 operand0.swizzleW = VGPU10_COMPONENT_W;
1865
1866 emit_dword(emit, operand0.value);
1867 emit_dword(emit, resource_number);
1868 }
1869
1870
1871 /**
1872 * Emit a sampler operand (for use with a SAMPLE instruction).
1873 */
1874 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned sampler_number)1875 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1876 unsigned sampler_number)
1877 {
1878 VGPU10OperandToken0 operand0;
1879
1880 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1881
1882 /* init */
1883 operand0.value = 0;
1884
1885 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1886 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1887
1888 emit_dword(emit, operand0.value);
1889 emit_dword(emit, sampler_number);
1890 }
1891
1892
1893 /**
1894 * Emit an operand which reads the IS_FRONT_FACING register.
1895 */
1896 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)1897 emit_face_register(struct svga_shader_emitter_v10 *emit)
1898 {
1899 VGPU10OperandToken0 operand0;
1900 unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1901
1902 /* init */
1903 operand0.value = 0;
1904
1905 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1906 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1907 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1908 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1909
1910 operand0.swizzleX = VGPU10_COMPONENT_X;
1911 operand0.swizzleY = VGPU10_COMPONENT_X;
1912 operand0.swizzleZ = VGPU10_COMPONENT_X;
1913 operand0.swizzleW = VGPU10_COMPONENT_X;
1914
1915 emit_dword(emit, operand0.value);
1916 emit_dword(emit, index);
1917 }
1918
1919
1920 /**
1921 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
1922 * instruction.
1923 */
1924 static void
emit_rasterizer_register(struct svga_shader_emitter_v10 * emit)1925 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
1926 {
1927 VGPU10OperandToken0 operand0;
1928
1929 /* init */
1930 operand0.value = 0;
1931
1932 /* No register index for rasterizer index (there's only one) */
1933 operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
1934 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1935 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1936 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1937 operand0.swizzleX = VGPU10_COMPONENT_X;
1938 operand0.swizzleY = VGPU10_COMPONENT_Y;
1939 operand0.swizzleZ = VGPU10_COMPONENT_Z;
1940 operand0.swizzleW = VGPU10_COMPONENT_W;
1941
1942 emit_dword(emit, operand0.value);
1943 }
1944
1945
1946 /**
1947 * Emit tokens for the "stream" register used by the
1948 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
1949 */
1950 static void
emit_stream_register(struct svga_shader_emitter_v10 * emit,unsigned index)1951 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
1952 {
1953 VGPU10OperandToken0 operand0;
1954
1955 /* init */
1956 operand0.value = 0;
1957
1958 /* No register index for rasterizer index (there's only one) */
1959 operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
1960 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1961 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1962
1963 emit_dword(emit, operand0.value);
1964 emit_dword(emit, index);
1965 }
1966
1967
1968 /**
1969 * Emit the token for a VGPU10 opcode, with precise parameter.
1970 * \param saturate clamp result to [0,1]?
1971 */
1972 static void
emit_opcode_precise(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,boolean precise)1973 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
1974 unsigned vgpu10_opcode, boolean saturate, boolean precise)
1975 {
1976 VGPU10OpcodeToken0 token0;
1977
1978 token0.value = 0; /* init all fields to zero */
1979 token0.opcodeType = vgpu10_opcode;
1980 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1981 token0.saturate = saturate;
1982
1983 /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
1984 * 'invariant' declarations. Only set preciseValues=1 if we have SM5.
1985 */
1986 token0.preciseValues = precise && emit->version >= 50;
1987
1988 emit_dword(emit, token0.value);
1989
1990 emit->uses_precise_qualifier |= token0.preciseValues;
1991 }
1992
1993
1994 /**
1995 * Emit the token for a VGPU10 opcode.
1996 * \param saturate clamp result to [0,1]?
1997 */
1998 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate)1999 emit_opcode(struct svga_shader_emitter_v10 *emit,
2000 unsigned vgpu10_opcode, boolean saturate)
2001 {
2002 emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
2003 }
2004
2005
2006 /**
2007 * Emit the token for a VGPU10 resinfo instruction.
2008 * \param modifier return type modifier, _uint or _rcpFloat.
2009 * TODO: We may want to remove this parameter if it will
2010 * only ever be used as _uint.
2011 */
2012 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)2013 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2014 VGPU10_RESINFO_RETURN_TYPE modifier)
2015 {
2016 VGPU10OpcodeToken0 token0;
2017
2018 token0.value = 0; /* init all fields to zero */
2019 token0.opcodeType = VGPU10_OPCODE_RESINFO;
2020 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2021 token0.resinfoReturnType = modifier;
2022
2023 emit_dword(emit, token0.value);
2024 }
2025
2026
2027 /**
2028 * Emit opcode tokens for a texture sample instruction. Texture instructions
2029 * can be rather complicated (texel offsets, etc) so we have this specialized
2030 * function.
2031 */
2032 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,const int offsets[3])2033 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2034 unsigned vgpu10_opcode, boolean saturate,
2035 const int offsets[3])
2036 {
2037 VGPU10OpcodeToken0 token0;
2038 VGPU10OpcodeToken1 token1;
2039
2040 token0.value = 0; /* init all fields to zero */
2041 token0.opcodeType = vgpu10_opcode;
2042 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2043 token0.saturate = saturate;
2044
2045 if (offsets[0] || offsets[1] || offsets[2]) {
2046 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2047 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2048 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2049 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2050 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2051 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2052
2053 token0.extended = 1;
2054 token1.value = 0;
2055 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2056 token1.offsetU = offsets[0];
2057 token1.offsetV = offsets[1];
2058 token1.offsetW = offsets[2];
2059 }
2060
2061 emit_dword(emit, token0.value);
2062 if (token0.extended) {
2063 emit_dword(emit, token1.value);
2064 }
2065 }
2066
2067
2068 /**
2069 * Emit a DISCARD opcode token.
2070 * If nonzero is set, we'll discard the fragment if the X component is not 0.
2071 * Otherwise, we'll discard the fragment if the X component is 0.
2072 */
2073 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,boolean nonzero)2074 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
2075 {
2076 VGPU10OpcodeToken0 opcode0;
2077
2078 opcode0.value = 0;
2079 opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2080 if (nonzero)
2081 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2082
2083 emit_dword(emit, opcode0.value);
2084 }
2085
2086
2087 /**
2088 * We need to call this before we begin emitting a VGPU10 instruction.
2089 */
2090 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)2091 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2092 {
2093 assert(emit->inst_start_token == 0);
2094 /* Save location of the instruction's VGPU10OpcodeToken0 token.
2095 * Note, we can't save a pointer because it would become invalid if
2096 * we have to realloc the output buffer.
2097 */
2098 emit->inst_start_token = emit_get_num_tokens(emit);
2099 }
2100
2101
2102 /**
2103 * We need to call this after we emit the last token of a VGPU10 instruction.
2104 * This function patches in the opcode token's instructionLength field.
2105 */
2106 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)2107 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2108 {
2109 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2110 unsigned inst_length;
2111
2112 assert(emit->inst_start_token > 0);
2113
2114 if (emit->discard_instruction) {
2115 /* Back up the emit->ptr to where this instruction started so
2116 * that we discard the current instruction.
2117 */
2118 emit->ptr = (char *) (tokens + emit->inst_start_token);
2119 }
2120 else {
2121 /* Compute instruction length and patch that into the start of
2122 * the instruction.
2123 */
2124 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2125
2126 assert(inst_length > 0);
2127
2128 tokens[emit->inst_start_token].instructionLength = inst_length;
2129 }
2130
2131 emit->inst_start_token = 0; /* reset to zero for error checking */
2132 emit->discard_instruction = FALSE;
2133 }
2134
2135
2136 /**
2137 * Return index for a free temporary register.
2138 */
2139 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)2140 get_temp_index(struct svga_shader_emitter_v10 *emit)
2141 {
2142 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2143 return emit->num_shader_temps + emit->internal_temp_count++;
2144 }
2145
2146
2147 /**
2148 * Release the temporaries which were generated by get_temp_index().
2149 */
2150 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)2151 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2152 {
2153 emit->internal_temp_count = 0;
2154 }
2155
2156
2157 /**
2158 * Create a tgsi_full_src_register.
2159 */
2160 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)2161 make_src_reg(enum tgsi_file_type file, unsigned index)
2162 {
2163 struct tgsi_full_src_register reg;
2164
2165 memset(®, 0, sizeof(reg));
2166 reg.Register.File = file;
2167 reg.Register.Index = index;
2168 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2169 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2170 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2171 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2172 return reg;
2173 }
2174
2175
2176 /**
2177 * Create a tgsi_full_src_register with a swizzle such that all four
2178 * vector components have the same scalar value.
2179 */
2180 static struct tgsi_full_src_register
make_src_scalar_reg(enum tgsi_file_type file,unsigned index,unsigned component)2181 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2182 {
2183 struct tgsi_full_src_register reg;
2184
2185 assert(component >= TGSI_SWIZZLE_X);
2186 assert(component <= TGSI_SWIZZLE_W);
2187
2188 memset(®, 0, sizeof(reg));
2189 reg.Register.File = file;
2190 reg.Register.Index = index;
2191 reg.Register.SwizzleX =
2192 reg.Register.SwizzleY =
2193 reg.Register.SwizzleZ =
2194 reg.Register.SwizzleW = component;
2195 return reg;
2196 }
2197
2198
2199 /**
2200 * Create a tgsi_full_src_register for a temporary.
2201 */
2202 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)2203 make_src_temp_reg(unsigned index)
2204 {
2205 return make_src_reg(TGSI_FILE_TEMPORARY, index);
2206 }
2207
2208
2209 /**
2210 * Create a tgsi_full_src_register for a constant.
2211 */
2212 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)2213 make_src_const_reg(unsigned index)
2214 {
2215 return make_src_reg(TGSI_FILE_CONSTANT, index);
2216 }
2217
2218
2219 /**
2220 * Create a tgsi_full_src_register for an immediate constant.
2221 */
2222 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)2223 make_src_immediate_reg(unsigned index)
2224 {
2225 return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2226 }
2227
2228
2229 /**
2230 * Create a tgsi_full_dst_register.
2231 */
2232 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)2233 make_dst_reg(enum tgsi_file_type file, unsigned index)
2234 {
2235 struct tgsi_full_dst_register reg;
2236
2237 memset(®, 0, sizeof(reg));
2238 reg.Register.File = file;
2239 reg.Register.Index = index;
2240 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2241 return reg;
2242 }
2243
2244
2245 /**
2246 * Create a tgsi_full_dst_register for a temporary.
2247 */
2248 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)2249 make_dst_temp_reg(unsigned index)
2250 {
2251 return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2252 }
2253
2254
2255 /**
2256 * Create a tgsi_full_dst_register for an output.
2257 */
2258 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)2259 make_dst_output_reg(unsigned index)
2260 {
2261 return make_dst_reg(TGSI_FILE_OUTPUT, index);
2262 }
2263
2264
2265 /**
2266 * Create negated tgsi_full_src_register.
2267 */
2268 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)2269 negate_src(const struct tgsi_full_src_register *reg)
2270 {
2271 struct tgsi_full_src_register neg = *reg;
2272 neg.Register.Negate = !reg->Register.Negate;
2273 return neg;
2274 }
2275
2276 /**
2277 * Create absolute value of a tgsi_full_src_register.
2278 */
2279 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)2280 absolute_src(const struct tgsi_full_src_register *reg)
2281 {
2282 struct tgsi_full_src_register absolute = *reg;
2283 absolute.Register.Absolute = 1;
2284 return absolute;
2285 }
2286
2287
2288 /** Return the named swizzle term from the src register */
2289 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)2290 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2291 {
2292 switch (term) {
2293 case TGSI_SWIZZLE_X:
2294 return reg->Register.SwizzleX;
2295 case TGSI_SWIZZLE_Y:
2296 return reg->Register.SwizzleY;
2297 case TGSI_SWIZZLE_Z:
2298 return reg->Register.SwizzleZ;
2299 case TGSI_SWIZZLE_W:
2300 return reg->Register.SwizzleW;
2301 default:
2302 assert(!"Bad swizzle");
2303 return TGSI_SWIZZLE_X;
2304 }
2305 }
2306
2307
2308 /**
2309 * Create swizzled tgsi_full_src_register.
2310 */
2311 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)2312 swizzle_src(const struct tgsi_full_src_register *reg,
2313 enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2314 enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2315 {
2316 struct tgsi_full_src_register swizzled = *reg;
2317 /* Note: we swizzle the current swizzle */
2318 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2319 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2320 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2321 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2322 return swizzled;
2323 }
2324
2325
2326 /**
2327 * Create swizzled tgsi_full_src_register where all the swizzle
2328 * terms are the same.
2329 */
2330 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)2331 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2332 {
2333 struct tgsi_full_src_register swizzled = *reg;
2334 /* Note: we swizzle the current swizzle */
2335 swizzled.Register.SwizzleX =
2336 swizzled.Register.SwizzleY =
2337 swizzled.Register.SwizzleZ =
2338 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2339 return swizzled;
2340 }
2341
2342
2343 /**
2344 * Create new tgsi_full_dst_register with writemask.
2345 * \param mask bitmask of TGSI_WRITEMASK_[XYZW]
2346 */
2347 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)2348 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2349 {
2350 struct tgsi_full_dst_register masked = *reg;
2351 masked.Register.WriteMask = mask;
2352 return masked;
2353 }
2354
2355
2356 /**
2357 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2358 */
2359 static boolean
same_swizzle_terms(const struct tgsi_full_src_register * reg)2360 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2361 {
2362 return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2363 reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2364 reg->Register.SwizzleZ == reg->Register.SwizzleW);
2365 }
2366
2367
2368 /**
2369 * Search the vector for the value 'x' and return its position.
2370 */
2371 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)2372 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2373 union tgsi_immediate_data x)
2374 {
2375 unsigned i;
2376 for (i = 0; i < 4; i++) {
2377 if (vec[i].Int == x.Int)
2378 return i;
2379 }
2380 return -1;
2381 }
2382
2383
2384 /**
2385 * Helper used by make_immediate_reg(), make_immediate_reg_4().
2386 */
2387 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)2388 find_immediate(struct svga_shader_emitter_v10 *emit,
2389 union tgsi_immediate_data x, unsigned startIndex)
2390 {
2391 const unsigned endIndex = emit->num_immediates;
2392 unsigned i;
2393
2394 assert(emit->immediates_emitted);
2395
2396 /* Search immediates for x, y, z, w */
2397 for (i = startIndex; i < endIndex; i++) {
2398 if (x.Int == emit->immediates[i][0].Int ||
2399 x.Int == emit->immediates[i][1].Int ||
2400 x.Int == emit->immediates[i][2].Int ||
2401 x.Int == emit->immediates[i][3].Int) {
2402 return i;
2403 }
2404 }
2405 /* Should never try to use an immediate value that wasn't pre-declared */
2406 assert(!"find_immediate() failed!");
2407 return -1;
2408 }
2409
2410
2411 /**
2412 * As above, but search for a double[2] pair.
2413 */
2414 static int
find_immediate_dbl(struct svga_shader_emitter_v10 * emit,double x,double y)2415 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2416 double x, double y)
2417 {
2418 const unsigned endIndex = emit->num_immediates;
2419 unsigned i;
2420
2421 assert(emit->immediates_emitted);
2422
2423 /* Search immediates for x, y, z, w */
2424 for (i = 0; i < endIndex; i++) {
2425 if (x == emit->immediates_dbl[i][0] &&
2426 y == emit->immediates_dbl[i][1]) {
2427 return i;
2428 }
2429 }
2430 /* Should never try to use an immediate value that wasn't pre-declared */
2431 assert(!"find_immediate_dbl() failed!");
2432 return -1;
2433 }
2434
2435
2436
2437 /**
2438 * Return a tgsi_full_src_register for an immediate/literal
2439 * union tgsi_immediate_data[4] value.
2440 * Note: the values must have been previously declared/allocated in
2441 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same
2442 * vec4 immediate.
2443 */
2444 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2445 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2446 const union tgsi_immediate_data imm[4])
2447 {
2448 struct tgsi_full_src_register reg;
2449 unsigned i;
2450
2451 for (i = 0; i < emit->num_common_immediates; i++) {
2452 /* search for first component value */
2453 int immpos = find_immediate(emit, imm[0], i);
2454 int x, y, z, w;
2455
2456 assert(immpos >= 0);
2457
2458 /* find remaining components within the immediate vector */
2459 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2460 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2461 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2462 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2463
2464 if (x >=0 && y >= 0 && z >= 0 && w >= 0) {
2465 /* found them all */
2466 memset(®, 0, sizeof(reg));
2467 reg.Register.File = TGSI_FILE_IMMEDIATE;
2468 reg.Register.Index = immpos;
2469 reg.Register.SwizzleX = x;
2470 reg.Register.SwizzleY = y;
2471 reg.Register.SwizzleZ = z;
2472 reg.Register.SwizzleW = w;
2473 return reg;
2474 }
2475 /* else, keep searching */
2476 }
2477
2478 assert(!"Failed to find immediate register!");
2479
2480 /* Just return IMM[0].xxxx */
2481 memset(®, 0, sizeof(reg));
2482 reg.Register.File = TGSI_FILE_IMMEDIATE;
2483 return reg;
2484 }
2485
2486
2487 /**
2488 * Return a tgsi_full_src_register for an immediate/literal
2489 * union tgsi_immediate_data value of the form {value, value, value, value}.
2490 * \sa make_immediate_reg_4() regarding allowed values.
2491 */
2492 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)2493 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2494 union tgsi_immediate_data value)
2495 {
2496 struct tgsi_full_src_register reg;
2497 int immpos = find_immediate(emit, value, 0);
2498
2499 assert(immpos >= 0);
2500
2501 memset(®, 0, sizeof(reg));
2502 reg.Register.File = TGSI_FILE_IMMEDIATE;
2503 reg.Register.Index = immpos;
2504 reg.Register.SwizzleX =
2505 reg.Register.SwizzleY =
2506 reg.Register.SwizzleZ =
2507 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2508
2509 return reg;
2510 }
2511
2512
2513 /**
2514 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2515 * \sa make_immediate_reg_4() regarding allowed values.
2516 */
2517 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2518 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2519 float x, float y, float z, float w)
2520 {
2521 union tgsi_immediate_data imm[4];
2522 imm[0].Float = x;
2523 imm[1].Float = y;
2524 imm[2].Float = z;
2525 imm[3].Float = w;
2526 return make_immediate_reg_4(emit, imm);
2527 }
2528
2529
2530 /**
2531 * Return a tgsi_full_src_register for an immediate/literal float value
2532 * of the form {value, value, value, value}.
2533 * \sa make_immediate_reg_4() regarding allowed values.
2534 */
2535 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)2536 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2537 {
2538 union tgsi_immediate_data imm;
2539 imm.Float = value;
2540 return make_immediate_reg(emit, imm);
2541 }
2542
2543
2544 /**
2545 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2546 */
2547 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2548 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2549 int x, int y, int z, int w)
2550 {
2551 union tgsi_immediate_data imm[4];
2552 imm[0].Int = x;
2553 imm[1].Int = y;
2554 imm[2].Int = z;
2555 imm[3].Int = w;
2556 return make_immediate_reg_4(emit, imm);
2557 }
2558
2559
2560 /**
2561 * Return a tgsi_full_src_register for an immediate/literal int value
2562 * of the form {value, value, value, value}.
2563 * \sa make_immediate_reg_4() regarding allowed values.
2564 */
2565 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)2566 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2567 {
2568 union tgsi_immediate_data imm;
2569 imm.Int = value;
2570 return make_immediate_reg(emit, imm);
2571 }
2572
2573
2574 static struct tgsi_full_src_register
make_immediate_reg_double(struct svga_shader_emitter_v10 * emit,double value)2575 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2576 {
2577 struct tgsi_full_src_register reg;
2578 int immpos = find_immediate_dbl(emit, value, value);
2579
2580 assert(immpos >= 0);
2581
2582 memset(®, 0, sizeof(reg));
2583 reg.Register.File = TGSI_FILE_IMMEDIATE;
2584 reg.Register.Index = immpos;
2585 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2586 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2587 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2588 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2589
2590 return reg;
2591 }
2592
2593
2594 /**
2595 * Allocate space for a union tgsi_immediate_data[4] immediate.
2596 * \return the index/position of the immediate.
2597 */
2598 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2599 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2600 const union tgsi_immediate_data imm[4])
2601 {
2602 unsigned n = emit->num_immediates++;
2603 assert(!emit->immediates_emitted);
2604 assert(n < ARRAY_SIZE(emit->immediates));
2605 emit->immediates[n][0] = imm[0];
2606 emit->immediates[n][1] = imm[1];
2607 emit->immediates[n][2] = imm[2];
2608 emit->immediates[n][3] = imm[3];
2609 return n;
2610 }
2611
2612
2613 /**
2614 * Allocate space for a float[4] immediate.
2615 * \return the index/position of the immediate.
2616 */
2617 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2618 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2619 float x, float y, float z, float w)
2620 {
2621 union tgsi_immediate_data imm[4];
2622 imm[0].Float = x;
2623 imm[1].Float = y;
2624 imm[2].Float = z;
2625 imm[3].Float = w;
2626 return alloc_immediate_4(emit, imm);
2627 }
2628
2629
2630 /**
2631 * Allocate space for an int[4] immediate.
2632 * \return the index/position of the immediate.
2633 */
2634 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2635 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2636 int x, int y, int z, int w)
2637 {
2638 union tgsi_immediate_data imm[4];
2639 imm[0].Int = x;
2640 imm[1].Int = y;
2641 imm[2].Int = z;
2642 imm[3].Int = w;
2643 return alloc_immediate_4(emit, imm);
2644 }
2645
2646
2647 static unsigned
alloc_immediate_double2(struct svga_shader_emitter_v10 * emit,double x,double y)2648 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2649 double x, double y)
2650 {
2651 unsigned n = emit->num_immediates++;
2652 assert(!emit->immediates_emitted);
2653 assert(n < ARRAY_SIZE(emit->immediates));
2654 emit->immediates_dbl[n][0] = x;
2655 emit->immediates_dbl[n][1] = y;
2656 return n;
2657
2658 }
2659
2660
2661 /**
2662 * Allocate a shader input to store a system value.
2663 */
2664 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)2665 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2666 {
2667 const unsigned n = emit->linkage.input_map_max + 1 + index;
2668 assert(index < ARRAY_SIZE(emit->system_value_indexes));
2669 emit->system_value_indexes[index] = n;
2670 return n;
2671 }
2672
2673
2674 /**
2675 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2676 */
2677 static boolean
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)2678 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2679 const struct tgsi_full_immediate *imm)
2680 {
2681 /* We don't actually emit any code here. We just save the
2682 * immediate values and emit them later.
2683 */
2684 alloc_immediate_4(emit, imm->u);
2685 return TRUE;
2686 }
2687
2688
2689 /**
2690 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2691 * containing all the immediate values previously allocated
2692 * with alloc_immediate_4().
2693 */
2694 static boolean
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)2695 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2696 {
2697 VGPU10OpcodeToken0 token;
2698
2699 assert(!emit->immediates_emitted);
2700
2701 token.value = 0;
2702 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2703 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2704
2705 /* Note: no begin/end_emit_instruction() calls */
2706 emit_dword(emit, token.value);
2707 emit_dword(emit, 2 + 4 * emit->num_immediates);
2708 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2709
2710 emit->immediates_emitted = TRUE;
2711
2712 return TRUE;
2713 }
2714
2715
2716 /**
2717 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2718 * interpolation mode.
2719 * \return a VGPU10_INTERPOLATION_x value
2720 */
2721 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)2722 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2723 enum tgsi_interpolate_mode interp,
2724 enum tgsi_interpolate_loc interpolate_loc)
2725 {
2726 if (interp == TGSI_INTERPOLATE_COLOR) {
2727 interp = emit->key.fs.flatshade ?
2728 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2729 }
2730
2731 switch (interp) {
2732 case TGSI_INTERPOLATE_CONSTANT:
2733 return VGPU10_INTERPOLATION_CONSTANT;
2734 case TGSI_INTERPOLATE_LINEAR:
2735 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2736 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2737 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2738 emit->version >= 41) {
2739 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2740 } else {
2741 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2742 }
2743 break;
2744 case TGSI_INTERPOLATE_PERSPECTIVE:
2745 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2746 return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2747 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2748 emit->version >= 41) {
2749 return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2750 } else {
2751 return VGPU10_INTERPOLATION_LINEAR;
2752 }
2753 break;
2754 default:
2755 assert(!"Unexpected interpolation mode");
2756 return VGPU10_INTERPOLATION_CONSTANT;
2757 }
2758 }
2759
2760
2761 /**
2762 * Translate a TGSI property to VGPU10.
2763 * Don't emit any instructions yet, only need to gather the primitive property
2764 * information. The output primitive topology might be changed later. The
2765 * final property instructions will be emitted as part of the pre-helper code.
2766 */
2767 static boolean
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)2768 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
2769 const struct tgsi_full_property *prop)
2770 {
2771 static const VGPU10_PRIMITIVE primType[] = {
2772 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */
2773 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */
2774 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */
2775 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */
2776 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */
2777 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */
2778 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */
2779 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */
2780 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
2781 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */
2782 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
2783 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2784 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2785 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2786 };
2787
2788 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
2789 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */
2790 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */
2791 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */
2792 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */
2793 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */
2794 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
2795 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
2796 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */
2797 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
2798 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */
2799 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
2800 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2801 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2802 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2803 };
2804
2805 static const unsigned inputArraySize[] = {
2806 0, /* VGPU10_PRIMITIVE_UNDEFINED */
2807 1, /* VGPU10_PRIMITIVE_POINT */
2808 2, /* VGPU10_PRIMITIVE_LINE */
2809 3, /* VGPU10_PRIMITIVE_TRIANGLE */
2810 0,
2811 0,
2812 4, /* VGPU10_PRIMITIVE_LINE_ADJ */
2813 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2814 };
2815
2816 switch (prop->Property.PropertyName) {
2817 case TGSI_PROPERTY_GS_INPUT_PRIM:
2818 assert(prop->u[0].Data < ARRAY_SIZE(primType));
2819 emit->gs.prim_type = primType[prop->u[0].Data];
2820 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
2821 emit->gs.input_size = inputArraySize[emit->gs.prim_type];
2822 break;
2823
2824 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2825 assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
2826 emit->gs.prim_topology = primTopology[prop->u[0].Data];
2827 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
2828 break;
2829
2830 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2831 emit->gs.max_out_vertices = prop->u[0].Data;
2832 break;
2833
2834 case TGSI_PROPERTY_GS_INVOCATIONS:
2835 emit->gs.invocations = prop->u[0].Data;
2836 break;
2837
2838 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2839 case TGSI_PROPERTY_NEXT_SHADER:
2840 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2841 /* no-op */
2842 break;
2843
2844 case TGSI_PROPERTY_TCS_VERTICES_OUT:
2845 /* This info is already captured in the shader key */
2846 break;
2847
2848 case TGSI_PROPERTY_TES_PRIM_MODE:
2849 emit->tes.prim_mode = prop->u[0].Data;
2850 break;
2851
2852 case TGSI_PROPERTY_TES_SPACING:
2853 emit->tes.spacing = prop->u[0].Data;
2854 break;
2855
2856 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
2857 emit->tes.vertices_order_cw = prop->u[0].Data;
2858 break;
2859
2860 case TGSI_PROPERTY_TES_POINT_MODE:
2861 emit->tes.point_mode = prop->u[0].Data;
2862 break;
2863
2864 default:
2865 debug_printf("Unexpected TGSI property %s\n",
2866 tgsi_property_names[prop->Property.PropertyName]);
2867 }
2868
2869 return TRUE;
2870 }
2871
2872
2873 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)2874 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
2875 VGPU10OpcodeToken0 opcode0, unsigned nData,
2876 unsigned data)
2877 {
2878 begin_emit_instruction(emit);
2879 emit_dword(emit, opcode0.value);
2880 if (nData)
2881 emit_dword(emit, data);
2882 end_emit_instruction(emit);
2883 }
2884
2885
2886 /**
2887 * Emit property instructions
2888 */
2889 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)2890 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
2891 {
2892 VGPU10OpcodeToken0 opcode0;
2893
2894 assert(emit->unit == PIPE_SHADER_GEOMETRY);
2895
2896 /* emit input primitive type declaration */
2897 opcode0.value = 0;
2898 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
2899 opcode0.primitive = emit->gs.prim_type;
2900 emit_property_instruction(emit, opcode0, 0, 0);
2901
2902 /* emit max output vertices */
2903 opcode0.value = 0;
2904 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
2905 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
2906
2907 if (emit->version >= 50 && emit->gs.invocations > 0) {
2908 opcode0.value = 0;
2909 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
2910 emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
2911 }
2912 }
2913
2914
2915 /**
2916 * A helper function to declare tessellator domain in a hull shader or
2917 * in the domain shader.
2918 */
2919 static void
emit_tessellator_domain(struct svga_shader_emitter_v10 * emit,enum pipe_prim_type prim_mode)2920 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
2921 enum pipe_prim_type prim_mode)
2922 {
2923 VGPU10OpcodeToken0 opcode0;
2924
2925 opcode0.value = 0;
2926 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
2927 switch (prim_mode) {
2928 case PIPE_PRIM_QUADS:
2929 case PIPE_PRIM_LINES:
2930 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
2931 break;
2932 case PIPE_PRIM_TRIANGLES:
2933 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
2934 break;
2935 default:
2936 debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
2937 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
2938 }
2939 begin_emit_instruction(emit);
2940 emit_dword(emit, opcode0.value);
2941 end_emit_instruction(emit);
2942 }
2943
2944
2945 /**
2946 * Emit domain shader declarations.
2947 */
2948 static void
emit_domain_shader_declarations(struct svga_shader_emitter_v10 * emit)2949 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
2950 {
2951 VGPU10OpcodeToken0 opcode0;
2952
2953 assert(emit->unit == PIPE_SHADER_TESS_EVAL);
2954
2955 /* Emit the input control point count */
2956 assert(emit->key.tes.vertices_per_patch >= 0 &&
2957 emit->key.tes.vertices_per_patch <= 32);
2958
2959 opcode0.value = 0;
2960 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
2961 opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
2962 begin_emit_instruction(emit);
2963 emit_dword(emit, opcode0.value);
2964 end_emit_instruction(emit);
2965
2966 emit_tessellator_domain(emit, emit->tes.prim_mode);
2967 }
2968
2969
2970 /**
2971 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
2972 * to implement some instructions. We pre-allocate those values here
2973 * in the immediate constant buffer.
2974 */
2975 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)2976 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
2977 {
2978 unsigned n = 0;
2979
2980 emit->common_immediate_pos[n++] =
2981 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
2982
2983 if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
2984 emit->common_immediate_pos[n++] =
2985 alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
2986 }
2987
2988 emit->common_immediate_pos[n++] =
2989 alloc_immediate_int4(emit, 0, 1, 0, -1);
2990
2991 if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
2992 emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
2993 emit->common_immediate_pos[n++] =
2994 alloc_immediate_int4(emit, 31, 0, 0, 0);
2995 }
2996
2997 if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
2998 emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
2999 emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3000 emit->common_immediate_pos[n++] =
3001 alloc_immediate_int4(emit, 32, 0, 0, 0);
3002 }
3003
3004 if (emit->key.vs.attrib_puint_to_snorm) {
3005 emit->common_immediate_pos[n++] =
3006 alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3007 }
3008
3009 if (emit->key.vs.attrib_puint_to_uscaled) {
3010 emit->common_immediate_pos[n++] =
3011 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3012 }
3013
3014 if (emit->key.vs.attrib_puint_to_sscaled) {
3015 emit->common_immediate_pos[n++] =
3016 alloc_immediate_int4(emit, 22, 12, 2, 0);
3017
3018 emit->common_immediate_pos[n++] =
3019 alloc_immediate_int4(emit, 22, 30, 0, 0);
3020 }
3021
3022 if (emit->vposition.num_prescale > 1) {
3023 unsigned i;
3024 for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3025 emit->common_immediate_pos[n++] =
3026 alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3027 }
3028 }
3029
3030 emit->immediates_dbl = (double (*)[2]) emit->immediates;
3031
3032 if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3033 emit->common_immediate_pos[n++] =
3034 alloc_immediate_double2(emit, -1.0, -1.0);
3035 }
3036
3037 if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
3038 emit->common_immediate_pos[n++] =
3039 alloc_immediate_double2(emit, 0.0, 0.0);
3040 emit->common_immediate_pos[n++] =
3041 alloc_immediate_double2(emit, 1.0, 1.0);
3042 }
3043
3044 if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3045 emit->common_immediate_pos[n++] =
3046 alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3047 }
3048
3049 assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3050
3051 unsigned i;
3052
3053 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3054 if (emit->key.tex[i].texel_bias) {
3055 /* Replace 0.0f if more immediate float value is needed */
3056 emit->common_immediate_pos[n++] =
3057 alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3058 break;
3059 }
3060 }
3061
3062 assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3063 emit->num_common_immediates = n;
3064 }
3065
3066
3067 /**
3068 * Emit hull shader declarations.
3069 */
3070 static void
emit_hull_shader_declarations(struct svga_shader_emitter_v10 * emit)3071 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3072 {
3073 VGPU10OpcodeToken0 opcode0;
3074
3075 /* Emit the input control point count */
3076 assert(emit->key.tcs.vertices_per_patch > 0 &&
3077 emit->key.tcs.vertices_per_patch <= 32);
3078
3079 opcode0.value = 0;
3080 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3081 opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3082 begin_emit_instruction(emit);
3083 emit_dword(emit, opcode0.value);
3084 end_emit_instruction(emit);
3085
3086 /* Emit the output control point count */
3087 assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3088
3089 opcode0.value = 0;
3090 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3091 opcode0.controlPointCount = emit->key.tcs.vertices_out;
3092 begin_emit_instruction(emit);
3093 emit_dword(emit, opcode0.value);
3094 end_emit_instruction(emit);
3095
3096 /* Emit tessellator domain */
3097 emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3098
3099 /* Emit tessellator output primitive */
3100 opcode0.value = 0;
3101 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3102 if (emit->key.tcs.point_mode) {
3103 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3104 }
3105 else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
3106 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3107 }
3108 else {
3109 assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
3110 emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
3111
3112 if (emit->key.tcs.vertices_order_cw)
3113 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3114 else
3115 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3116 }
3117 begin_emit_instruction(emit);
3118 emit_dword(emit, opcode0.value);
3119 end_emit_instruction(emit);
3120
3121 /* Emit tessellator partitioning */
3122 opcode0.value = 0;
3123 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3124 switch (emit->key.tcs.spacing) {
3125 case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3126 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3127 break;
3128 case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3129 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3130 break;
3131 case PIPE_TESS_SPACING_EQUAL:
3132 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3133 break;
3134 default:
3135 debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3136 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3137 }
3138 begin_emit_instruction(emit);
3139 emit_dword(emit, opcode0.value);
3140 end_emit_instruction(emit);
3141
3142 /* Declare constant registers */
3143 emit_constant_declaration(emit);
3144
3145 /* Declare samplers and resources */
3146 emit_sampler_declarations(emit);
3147 emit_resource_declarations(emit);
3148
3149 alloc_common_immediates(emit);
3150
3151 int nVertices = emit->key.tcs.vertices_per_patch;
3152 emit->tcs.imm_index =
3153 alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3154
3155 /* Now, emit the constant block containing all the immediates
3156 * declared by shader, as well as the extra ones seen above.
3157 */
3158 emit_vgpu10_immediates_block(emit);
3159
3160 }
3161
3162
3163 /**
3164 * A helper function to determine if control point phase is needed.
3165 * Returns TRUE if there is control point output.
3166 */
3167 static boolean
needs_control_point_phase(struct svga_shader_emitter_v10 * emit)3168 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3169 {
3170 unsigned i;
3171
3172 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3173
3174 /* If output control point count does not match the input count,
3175 * we need a control point phase to explicitly set the output control
3176 * points.
3177 */
3178 if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3179 emit->key.tcs.vertices_out)
3180 return TRUE;
3181
3182 for (i = 0; i < emit->info.num_outputs; i++) {
3183 switch (emit->info.output_semantic_name[i]) {
3184 case TGSI_SEMANTIC_PATCH:
3185 case TGSI_SEMANTIC_TESSOUTER:
3186 case TGSI_SEMANTIC_TESSINNER:
3187 break;
3188 default:
3189 return TRUE;
3190 }
3191 }
3192 return FALSE;
3193 }
3194
3195
3196 /**
3197 * A helper function to add shader signature for passthrough control point
3198 * phase. This signature is also generated for passthrough control point
3199 * phase from HLSL compiler and is needed by Metal Renderer.
3200 */
3201 static void
emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 * emit)3202 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3203 {
3204 struct svga_shader_signature *sgn = &emit->signature;
3205 SVGA3dDXShaderSignatureEntry *sgnEntry;
3206 unsigned i;
3207
3208 for (i = 0; i < emit->info.num_inputs; i++) {
3209 unsigned index = emit->linkage.input_map[i];
3210 enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3211
3212 sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3213
3214 set_shader_signature_entry(sgnEntry, index,
3215 tgsi_semantic_to_sgn_name[sem_name],
3216 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3217 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3218 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3219
3220 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3221
3222 set_shader_signature_entry(sgnEntry, i,
3223 tgsi_semantic_to_sgn_name[sem_name],
3224 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3225 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3226 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3227 }
3228 }
3229
3230
3231 /**
3232 * A helper function to emit an instruction to start the control point phase
3233 * in the hull shader.
3234 */
3235 static void
emit_control_point_phase_instruction(struct svga_shader_emitter_v10 * emit)3236 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3237 {
3238 VGPU10OpcodeToken0 opcode0;
3239
3240 opcode0.value = 0;
3241 opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3242 begin_emit_instruction(emit);
3243 emit_dword(emit, opcode0.value);
3244 end_emit_instruction(emit);
3245 }
3246
3247
3248 /**
3249 * Start the hull shader control point phase
3250 */
3251 static boolean
emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 * emit)3252 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3253 {
3254 /* If there is no control point output, skip the control point phase. */
3255 if (!needs_control_point_phase(emit)) {
3256 if (!emit->key.tcs.vertices_out) {
3257 /**
3258 * If the tcs does not explicitly generate any control point output
3259 * and the tes does not use any input control point, then
3260 * emit an empty control point phase with zero output control
3261 * point count.
3262 */
3263 emit_control_point_phase_instruction(emit);
3264
3265 /**
3266 * Since this is an empty control point phase, we will need to
3267 * add input signatures when we parse the tcs again in the
3268 * patch constant phase.
3269 */
3270 emit->tcs.fork_phase_add_signature = TRUE;
3271 }
3272 else {
3273 /**
3274 * Before skipping the control point phase, add the signature for
3275 * the passthrough control point.
3276 */
3277 emit_passthrough_control_point_signature(emit);
3278 }
3279 return FALSE;
3280 }
3281
3282 /* Start the control point phase in the hull shader */
3283 emit_control_point_phase_instruction(emit);
3284
3285 /* Declare the output control point ID */
3286 if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3287 /* Add invocation id declaration if it does not exist */
3288 emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3289 }
3290
3291 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3292 VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3293 VGPU10_OPERAND_INDEX_0D,
3294 0, 1,
3295 VGPU10_NAME_UNDEFINED,
3296 VGPU10_OPERAND_0_COMPONENT, 0,
3297 0,
3298 VGPU10_INTERPOLATION_CONSTANT, TRUE,
3299 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3300
3301 if (emit->tcs.prim_id_index != INVALID_INDEX) {
3302 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3303 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3304 VGPU10_OPERAND_INDEX_0D,
3305 0, 1,
3306 VGPU10_NAME_UNDEFINED,
3307 VGPU10_OPERAND_0_COMPONENT,
3308 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3309 0,
3310 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3311 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3312 }
3313
3314 return TRUE;
3315 }
3316
3317
3318 /**
3319 * Start the hull shader patch constant phase and
3320 * do the second pass of the tcs translation and emit
3321 * the relevant declarations and instructions for this phase.
3322 */
3323 static boolean
emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 * emit,struct tgsi_parse_context * parse)3324 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3325 struct tgsi_parse_context *parse)
3326 {
3327 unsigned inst_number = 0;
3328 boolean ret = TRUE;
3329 VGPU10OpcodeToken0 opcode0;
3330
3331 emit->skip_instruction = FALSE;
3332
3333 /* Start the patch constant phase */
3334 opcode0.value = 0;
3335 opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3336 begin_emit_instruction(emit);
3337 emit_dword(emit, opcode0.value);
3338 end_emit_instruction(emit);
3339
3340 /* Set the current phase to patch constant phase */
3341 emit->tcs.control_point_phase = FALSE;
3342
3343 if (emit->tcs.prim_id_index != INVALID_INDEX) {
3344 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3345 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3346 VGPU10_OPERAND_INDEX_0D,
3347 0, 1,
3348 VGPU10_NAME_UNDEFINED,
3349 VGPU10_OPERAND_0_COMPONENT,
3350 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3351 0,
3352 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3353 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3354 }
3355
3356 /* Emit declarations for this phase */
3357 emit->index_range.required =
3358 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
3359 emit_tcs_input_declarations(emit);
3360
3361 if (emit->index_range.start_index != INVALID_INDEX) {
3362 emit_index_range_declaration(emit);
3363 }
3364
3365 emit->index_range.required =
3366 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
3367 emit_tcs_output_declarations(emit);
3368
3369 if (emit->index_range.start_index != INVALID_INDEX) {
3370 emit_index_range_declaration(emit);
3371 }
3372 emit->index_range.required = FALSE;
3373
3374 emit_temporaries_declaration(emit);
3375
3376 /* Reset the token position to the first instruction token
3377 * in preparation for the second pass of the shader
3378 */
3379 parse->Position = emit->tcs.instruction_token_pos;
3380
3381 while (!tgsi_parse_end_of_tokens(parse)) {
3382 tgsi_parse_token(parse);
3383
3384 assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3385 ret = emit_vgpu10_instruction(emit, inst_number++,
3386 &parse->FullToken.FullInstruction);
3387
3388 /* Usually this applies to TCS only. If shader is reading output of
3389 * patch constant in fork phase, we should reemit all instructions
3390 * which are writting into ouput of patch constant in fork phase
3391 * to store results into temporaries.
3392 */
3393 if (emit->reemit_instruction) {
3394 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3395 ret = emit_vgpu10_instruction(emit, inst_number,
3396 &parse->FullToken.FullInstruction);
3397 }
3398
3399 if (!ret)
3400 return FALSE;
3401 }
3402
3403 return TRUE;
3404 }
3405
3406
3407 /**
3408 * Emit index range declaration.
3409 */
3410 static boolean
emit_index_range_declaration(struct svga_shader_emitter_v10 * emit)3411 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3412 {
3413 if (emit->version < 50)
3414 return TRUE;
3415
3416 assert(emit->index_range.start_index != INVALID_INDEX);
3417 assert(emit->index_range.count != 0);
3418 assert(emit->index_range.required);
3419 assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3420 assert(emit->index_range.dim != 0);
3421 assert(emit->index_range.size != 0);
3422
3423 VGPU10OpcodeToken0 opcode0;
3424 VGPU10OperandToken0 operand0;
3425
3426 opcode0.value = 0;
3427 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3428
3429 operand0.value = 0;
3430 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3431 operand0.indexDimension = emit->index_range.dim;
3432 operand0.operandType = emit->index_range.operandType;
3433 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3434 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3435
3436 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3437 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3438
3439 begin_emit_instruction(emit);
3440 emit_dword(emit, opcode0.value);
3441 emit_dword(emit, operand0.value);
3442
3443 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3444 emit_dword(emit, emit->index_range.size);
3445 emit_dword(emit, emit->index_range.start_index);
3446 emit_dword(emit, emit->index_range.count);
3447 }
3448 else {
3449 emit_dword(emit, emit->index_range.start_index);
3450 emit_dword(emit, emit->index_range.count);
3451 }
3452
3453 end_emit_instruction(emit);
3454
3455 /* Reset fields in emit->index_range struct except
3456 * emit->index_range.required which will be reset afterwards
3457 */
3458 emit->index_range.count = 0;
3459 emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3460 emit->index_range.start_index = INVALID_INDEX;
3461 emit->index_range.size = 0;
3462 emit->index_range.dim = 0;
3463
3464 return TRUE;
3465 }
3466
3467
3468 /**
3469 * Emit a vgpu10 declaration "instruction".
3470 * \param index the register index
3471 * \param size array size of the operand. In most cases, it is 1,
3472 * but for inputs to geometry shader, the array size varies
3473 * depending on the primitive type.
3474 */
3475 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)3476 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3477 VGPU10OpcodeToken0 opcode0,
3478 VGPU10OperandToken0 operand0,
3479 VGPU10NameToken name_token,
3480 unsigned index, unsigned size)
3481 {
3482 assert(opcode0.opcodeType);
3483 assert(operand0.mask ||
3484 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3485 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3486 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3487 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3488 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3489 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3490 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3491 (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3492
3493 begin_emit_instruction(emit);
3494 emit_dword(emit, opcode0.value);
3495
3496 emit_dword(emit, operand0.value);
3497
3498 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3499 /* Next token is the index of the register to declare */
3500 emit_dword(emit, index);
3501 }
3502 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3503 /* Next token is the size of the register */
3504 emit_dword(emit, size);
3505
3506 /* Followed by the index of the register */
3507 emit_dword(emit, index);
3508 }
3509
3510 if (name_token.value) {
3511 emit_dword(emit, name_token.value);
3512 }
3513
3514 end_emit_instruction(emit);
3515 }
3516
3517
3518 /**
3519 * Emit the declaration for a shader input.
3520 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3521 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3522 * \param dim index dimension
3523 * \param index the input register index
3524 * \param size array size of the operand. In most cases, it is 1,
3525 * but for inputs to geometry shader, the array size varies
3526 * depending on the primitive type. For tessellation control
3527 * shader, the array size is the vertex count per patch.
3528 * \param name one of VGPU10_NAME_x
3529 * \parma numComp number of components
3530 * \param selMode component selection mode
3531 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3532 * \param interpMode interpolation mode
3533 */
3534 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcodeType,VGPU10_OPERAND_TYPE operandType,VGPU10_OPERAND_INDEX_DIMENSION dim,unsigned index,unsigned size,VGPU10_SYSTEM_NAME name,VGPU10_OPERAND_NUM_COMPONENTS numComp,VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,unsigned usageMask,VGPU10_INTERPOLATION_MODE interpMode,boolean addSignature,SVGA3dDXSignatureSemanticName sgnName)3535 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3536 VGPU10_OPCODE_TYPE opcodeType,
3537 VGPU10_OPERAND_TYPE operandType,
3538 VGPU10_OPERAND_INDEX_DIMENSION dim,
3539 unsigned index, unsigned size,
3540 VGPU10_SYSTEM_NAME name,
3541 VGPU10_OPERAND_NUM_COMPONENTS numComp,
3542 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3543 unsigned usageMask,
3544 VGPU10_INTERPOLATION_MODE interpMode,
3545 boolean addSignature,
3546 SVGA3dDXSignatureSemanticName sgnName)
3547 {
3548 VGPU10OpcodeToken0 opcode0;
3549 VGPU10OperandToken0 operand0;
3550 VGPU10NameToken name_token;
3551
3552 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3553 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3554 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3555 opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3556 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3557 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3558 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3559 assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3560 operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3561 operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3562 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3563 operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3564 operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3565 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3566 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3567 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3568 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3569 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3570
3571 assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3572 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3573 assert(dim <= VGPU10_OPERAND_INDEX_3D);
3574 assert(name == VGPU10_NAME_UNDEFINED ||
3575 name == VGPU10_NAME_POSITION ||
3576 name == VGPU10_NAME_INSTANCE_ID ||
3577 name == VGPU10_NAME_VERTEX_ID ||
3578 name == VGPU10_NAME_PRIMITIVE_ID ||
3579 name == VGPU10_NAME_IS_FRONT_FACE ||
3580 name == VGPU10_NAME_SAMPLE_INDEX ||
3581 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3582 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3583
3584 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3585 interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3586 interpMode == VGPU10_INTERPOLATION_LINEAR ||
3587 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3588 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3589 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3590 interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3591 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3592
3593 check_register_index(emit, opcodeType, index);
3594
3595 opcode0.value = operand0.value = name_token.value = 0;
3596
3597 opcode0.opcodeType = opcodeType;
3598 opcode0.interpolationMode = interpMode;
3599
3600 operand0.operandType = operandType;
3601 operand0.numComponents = numComp;
3602 operand0.selectionMode = selMode;
3603 operand0.mask = usageMask;
3604 operand0.indexDimension = dim;
3605 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3606 if (dim == VGPU10_OPERAND_INDEX_2D)
3607 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3608
3609 name_token.name = name;
3610
3611 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3612
3613 if (addSignature) {
3614 struct svga_shader_signature *sgn = &emit->signature;
3615 if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3616 /* Set patch constant signature */
3617 SVGA3dDXShaderSignatureEntry *sgnEntry =
3618 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3619 set_shader_signature_entry(sgnEntry, index,
3620 sgnName, usageMask,
3621 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3622 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3623
3624 } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3625 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3626 /* Set input signature */
3627 SVGA3dDXShaderSignatureEntry *sgnEntry =
3628 &sgn->inputs[sgn->header.numInputSignatures++];
3629 set_shader_signature_entry(sgnEntry, index,
3630 sgnName, usageMask,
3631 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3632 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3633 }
3634 }
3635
3636 if (emit->index_range.required) {
3637 /* Here, index_range declaration is only applicable for opcodeType
3638 * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3639 * for operandType VGPU10_OPERAND_TYPE_INPUT,
3640 * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3641 * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3642 */
3643 if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3644 opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3645 (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3646 operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3647 operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3648 if (emit->index_range.start_index != INVALID_INDEX) {
3649 emit_index_range_declaration(emit);
3650 }
3651 return;
3652 }
3653
3654 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3655 /* Need record new index_range */
3656 emit->index_range.count = 1;
3657 emit->index_range.operandType = operandType;
3658 emit->index_range.start_index = index;
3659 emit->index_range.size = size;
3660 emit->index_range.dim = dim;
3661 }
3662 else if (index !=
3663 (emit->index_range.start_index + emit->index_range.count) ||
3664 emit->index_range.operandType != operandType) {
3665 /* Input index is not contiguous with index range or operandType is
3666 * different from index range's operandType. We need to emit current
3667 * index_range first and then start recording next index range.
3668 */
3669 emit_index_range_declaration(emit);
3670
3671 emit->index_range.count = 1;
3672 emit->index_range.operandType = operandType;
3673 emit->index_range.start_index = index;
3674 emit->index_range.size = size;
3675 emit->index_range.dim = dim;
3676 }
3677 else if (emit->index_range.operandType == operandType) {
3678 /* Since input index is contiguous with index range and operandType
3679 * is same as index range's operandType, increment index range count.
3680 */
3681 emit->index_range.count++;
3682 }
3683 }
3684 }
3685
3686
3687 /**
3688 * Emit the declaration for a shader output.
3689 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx
3690 * \param index the output register index
3691 * \param name one of VGPU10_NAME_x
3692 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3693 */
3694 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE type,unsigned index,VGPU10_SYSTEM_NAME name,unsigned writemask,boolean addSignature,SVGA3dDXSignatureSemanticName sgnName)3695 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
3696 VGPU10_OPCODE_TYPE type, unsigned index,
3697 VGPU10_SYSTEM_NAME name,
3698 unsigned writemask,
3699 boolean addSignature,
3700 SVGA3dDXSignatureSemanticName sgnName)
3701 {
3702 VGPU10OpcodeToken0 opcode0;
3703 VGPU10OperandToken0 operand0;
3704 VGPU10NameToken name_token;
3705
3706 assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3707 assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
3708 type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
3709 type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
3710 assert(name == VGPU10_NAME_UNDEFINED ||
3711 name == VGPU10_NAME_POSITION ||
3712 name == VGPU10_NAME_PRIMITIVE_ID ||
3713 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3714 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
3715 name == VGPU10_NAME_CLIP_DISTANCE);
3716
3717 check_register_index(emit, type, index);
3718
3719 opcode0.value = operand0.value = name_token.value = 0;
3720
3721 opcode0.opcodeType = type;
3722 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3723 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3724 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
3725 operand0.mask = writemask;
3726 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3727 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3728
3729 name_token.name = name;
3730
3731 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
3732
3733 /* Capture output signature */
3734 if (addSignature) {
3735 struct svga_shader_signature *sgn = &emit->signature;
3736 SVGA3dDXShaderSignatureEntry *sgnEntry =
3737 &sgn->outputs[sgn->header.numOutputSignatures++];
3738 set_shader_signature_entry(sgnEntry, index,
3739 sgnName, writemask,
3740 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3741 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3742 }
3743
3744 if (emit->index_range.required) {
3745 /* Here, index_range declaration is only applicable for opcodeType
3746 * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3747 * VGPU10_OPERAND_TYPE_OUTPUT.
3748 */
3749 if (type != VGPU10_OPCODE_DCL_OUTPUT) {
3750 if (emit->index_range.start_index != INVALID_INDEX) {
3751 emit_index_range_declaration(emit);
3752 }
3753 return;
3754 }
3755
3756 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3757 /* Need record new index_range */
3758 emit->index_range.count = 1;
3759 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3760 emit->index_range.start_index = index;
3761 emit->index_range.size = 1;
3762 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3763 }
3764 else if (index !=
3765 (emit->index_range.start_index + emit->index_range.count)) {
3766 /* Output index is not contiguous with index range. We need to
3767 * emit current index_range first and then start recording next
3768 * index range.
3769 */
3770 emit_index_range_declaration(emit);
3771
3772 emit->index_range.count = 1;
3773 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3774 emit->index_range.start_index = index;
3775 emit->index_range.size = 1;
3776 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3777 }
3778 else {
3779 /* Since output index is contiguous with index range, increment
3780 * index range count.
3781 */
3782 emit->index_range.count++;
3783 }
3784 }
3785 }
3786
3787
3788 /**
3789 * Emit the declaration for the fragment depth output.
3790 */
3791 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)3792 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
3793 {
3794 VGPU10OpcodeToken0 opcode0;
3795 VGPU10OperandToken0 operand0;
3796 VGPU10NameToken name_token;
3797
3798 assert(emit->unit == PIPE_SHADER_FRAGMENT);
3799
3800 opcode0.value = operand0.value = name_token.value = 0;
3801
3802 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3803 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
3804 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
3805 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3806 operand0.mask = 0;
3807
3808 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3809 }
3810
3811
3812 /**
3813 * Emit the declaration for the fragment sample mask/coverage output.
3814 */
3815 static void
emit_samplemask_output_declaration(struct svga_shader_emitter_v10 * emit)3816 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
3817 {
3818 VGPU10OpcodeToken0 opcode0;
3819 VGPU10OperandToken0 operand0;
3820 VGPU10NameToken name_token;
3821
3822 assert(emit->unit == PIPE_SHADER_FRAGMENT);
3823 assert(emit->version >= 41);
3824
3825 opcode0.value = operand0.value = name_token.value = 0;
3826
3827 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3828 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
3829 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
3830 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3831 operand0.mask = 0;
3832
3833 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3834 }
3835
3836
3837 /**
3838 * Emit output declarations for fragment shader.
3839 */
3840 static void
emit_fs_output_declarations(struct svga_shader_emitter_v10 * emit)3841 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
3842 {
3843 unsigned int i;
3844
3845 for (i = 0; i < emit->info.num_outputs; i++) {
3846 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
3847 const enum tgsi_semantic semantic_name =
3848 emit->info.output_semantic_name[i];
3849 const unsigned semantic_index = emit->info.output_semantic_index[i];
3850 unsigned index = i;
3851
3852 if (semantic_name == TGSI_SEMANTIC_COLOR) {
3853 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
3854
3855 emit->fs.color_out_index[semantic_index] = index;
3856
3857 emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
3858 index + 1);
3859
3860 /* The semantic index is the shader's color output/buffer index */
3861 emit_output_declaration(emit,
3862 VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
3863 VGPU10_NAME_UNDEFINED,
3864 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3865 TRUE,
3866 map_tgsi_semantic_to_sgn_name(semantic_name));
3867
3868 if (semantic_index == 0) {
3869 if (emit->key.fs.write_color0_to_n_cbufs > 1) {
3870 /* Emit declarations for the additional color outputs
3871 * for broadcasting.
3872 */
3873 unsigned j;
3874 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
3875 /* Allocate a new output index */
3876 unsigned idx = emit->info.num_outputs + j - 1;
3877 emit->fs.color_out_index[j] = idx;
3878 emit_output_declaration(emit,
3879 VGPU10_OPCODE_DCL_OUTPUT, idx,
3880 VGPU10_NAME_UNDEFINED,
3881 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3882 TRUE,
3883 map_tgsi_semantic_to_sgn_name(semantic_name));
3884 emit->info.output_semantic_index[idx] = j;
3885 }
3886
3887 emit->fs.num_color_outputs =
3888 emit->key.fs.write_color0_to_n_cbufs;
3889 }
3890 }
3891 }
3892 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
3893 /* Fragment depth output */
3894 emit_fragdepth_output_declaration(emit);
3895 }
3896 else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
3897 /* Sample mask output */
3898 emit_samplemask_output_declaration(emit);
3899 }
3900 else {
3901 assert(!"Bad output semantic name");
3902 }
3903 }
3904 }
3905
3906
3907 /**
3908 * Emit common output declaration for vertex processing.
3909 */
3910 static void
emit_vertex_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned writemask,boolean addSignature)3911 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
3912 unsigned index, unsigned writemask,
3913 boolean addSignature)
3914 {
3915 const enum tgsi_semantic semantic_name =
3916 emit->info.output_semantic_name[index];
3917 const unsigned semantic_index = emit->info.output_semantic_index[index];
3918 unsigned name, type;
3919 unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3920
3921 assert(emit->unit != PIPE_SHADER_FRAGMENT &&
3922 emit->unit != PIPE_SHADER_COMPUTE);
3923
3924 switch (semantic_name) {
3925 case TGSI_SEMANTIC_POSITION:
3926 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
3927 /* position will be declared in control point only */
3928 assert(emit->tcs.control_point_phase);
3929 type = VGPU10_OPCODE_DCL_OUTPUT;
3930 name = VGPU10_NAME_UNDEFINED;
3931 emit_output_declaration(emit, type, index, name, final_mask, TRUE,
3932 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3933 return;
3934 }
3935 else {
3936 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3937 name = VGPU10_NAME_POSITION;
3938 }
3939 /* Save the index of the vertex position output register */
3940 emit->vposition.out_index = index;
3941 break;
3942 case TGSI_SEMANTIC_CLIPDIST:
3943 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3944 name = VGPU10_NAME_CLIP_DISTANCE;
3945 /* save the starting index of the clip distance output register */
3946 if (semantic_index == 0)
3947 emit->clip_dist_out_index = index;
3948 final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
3949 if (final_mask == 0x0)
3950 return; /* discard this do-nothing declaration */
3951 break;
3952 case TGSI_SEMANTIC_CLIPVERTEX:
3953 type = VGPU10_OPCODE_DCL_OUTPUT;
3954 name = VGPU10_NAME_UNDEFINED;
3955 emit->clip_vertex_out_index = index;
3956 break;
3957 default:
3958 /* generic output */
3959 type = VGPU10_OPCODE_DCL_OUTPUT;
3960 name = VGPU10_NAME_UNDEFINED;
3961 }
3962
3963 emit_output_declaration(emit, type, index, name, final_mask, addSignature,
3964 map_tgsi_semantic_to_sgn_name(semantic_name));
3965 }
3966
3967
3968 /**
3969 * Emit declaration for outputs in vertex shader.
3970 */
3971 static void
emit_vs_output_declarations(struct svga_shader_emitter_v10 * emit)3972 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
3973 {
3974 unsigned i;
3975 for (i = 0; i < emit->info.num_outputs; i++) {
3976 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
3977 }
3978 }
3979
3980
3981 /**
3982 * A helper function to determine the writemask for an output
3983 * for the specified stream.
3984 */
3985 static unsigned
output_writemask_for_stream(unsigned stream,ubyte output_streams,ubyte output_usagemask)3986 output_writemask_for_stream(unsigned stream, ubyte output_streams,
3987 ubyte output_usagemask)
3988 {
3989 unsigned i;
3990 unsigned writemask = 0;
3991
3992 for (i = 0; i < 4; i++) {
3993 if ((output_streams & 0x3) == stream)
3994 writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
3995 output_streams >>= 2;
3996 }
3997 return writemask & output_usagemask;
3998 }
3999
4000
4001 /**
4002 * Emit declaration for outputs in geometry shader.
4003 */
4004 static void
emit_gs_output_declarations(struct svga_shader_emitter_v10 * emit)4005 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4006 {
4007 unsigned i;
4008 VGPU10OpcodeToken0 opcode0;
4009 unsigned numStreamsSupported = 1;
4010 int s;
4011
4012 if (emit->version >= 50) {
4013 numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4014 }
4015
4016 /**
4017 * Start emitting from the last stream first, so we end with
4018 * stream 0, so any of the auxiliary output declarations will
4019 * go to stream 0.
4020 */
4021 for (s = numStreamsSupported-1; s >= 0; s--) {
4022
4023 if (emit->info.num_stream_output_components[s] == 0)
4024 continue;
4025
4026 if (emit->version >= 50) {
4027 /* DCL_STREAM stream */
4028 begin_emit_instruction(emit);
4029 emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
4030 emit_stream_register(emit, s);
4031 end_emit_instruction(emit);
4032 }
4033
4034 /* emit output primitive topology declaration */
4035 opcode0.value = 0;
4036 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4037 opcode0.primitiveTopology = emit->gs.prim_topology;
4038 emit_property_instruction(emit, opcode0, 0, 0);
4039
4040 for (i = 0; i < emit->info.num_outputs; i++) {
4041 unsigned writemask;
4042
4043 /* find out the writemask for this stream */
4044 writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4045 emit->output_usage_mask[i]);
4046
4047 if (writemask) {
4048 enum tgsi_semantic semantic_name =
4049 emit->info.output_semantic_name[i];
4050
4051 /* TODO: Still need to take care of a special case where a
4052 * single varying spans across multiple output registers.
4053 */
4054 switch(semantic_name) {
4055 case TGSI_SEMANTIC_PRIMID:
4056 emit_output_declaration(emit,
4057 VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4058 VGPU10_NAME_PRIMITIVE_ID,
4059 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4060 FALSE,
4061 map_tgsi_semantic_to_sgn_name(semantic_name));
4062 break;
4063 case TGSI_SEMANTIC_LAYER:
4064 emit_output_declaration(emit,
4065 VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4066 VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4067 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4068 FALSE,
4069 map_tgsi_semantic_to_sgn_name(semantic_name));
4070 break;
4071 case TGSI_SEMANTIC_VIEWPORT_INDEX:
4072 emit_output_declaration(emit,
4073 VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4074 VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4075 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4076 FALSE,
4077 map_tgsi_semantic_to_sgn_name(semantic_name));
4078 emit->gs.viewport_index_out_index = i;
4079 break;
4080 default:
4081 emit_vertex_output_declaration(emit, i, writemask, FALSE);
4082 }
4083 }
4084 }
4085 }
4086
4087 /* For geometry shader outputs, it is possible the same register is
4088 * declared multiple times for different streams. So to avoid
4089 * redundant signature entries, geometry shader output signature is done
4090 * outside of the declaration.
4091 */
4092 struct svga_shader_signature *sgn = &emit->signature;
4093 SVGA3dDXShaderSignatureEntry *sgnEntry;
4094
4095 for (i = 0; i < emit->info.num_outputs; i++) {
4096 if (emit->output_usage_mask[i]) {
4097 enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4098
4099 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4100 set_shader_signature_entry(sgnEntry, i,
4101 map_tgsi_semantic_to_sgn_name(sem_name),
4102 emit->output_usage_mask[i],
4103 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4104 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4105 }
4106 }
4107 }
4108
4109
4110 /**
4111 * Emit the declaration for the tess inner/outer output.
4112 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4113 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4114 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4115 */
4116 static void
emit_tesslevel_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned opcodeType,unsigned operandType,VGPU10_SYSTEM_NAME name,SVGA3dDXSignatureSemanticName sgnName)4117 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4118 unsigned index, unsigned opcodeType,
4119 unsigned operandType, VGPU10_SYSTEM_NAME name,
4120 SVGA3dDXSignatureSemanticName sgnName)
4121 {
4122 VGPU10OpcodeToken0 opcode0;
4123 VGPU10OperandToken0 operand0;
4124 VGPU10NameToken name_token;
4125
4126 assert(emit->version >= 50);
4127 assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4128 (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
4129 name == VGPU10_NAME_UNDEFINED));
4130 assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4131
4132 assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4133 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4134
4135 opcode0.value = operand0.value = name_token.value = 0;
4136
4137 opcode0.opcodeType = opcodeType;
4138 operand0.operandType = operandType;
4139 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4140 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4141 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4142 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4143 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4144
4145 name_token.name = name;
4146 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4147
4148 /* Capture patch constant signature */
4149 struct svga_shader_signature *sgn = &emit->signature;
4150 SVGA3dDXShaderSignatureEntry *sgnEntry =
4151 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4152 set_shader_signature_entry(sgnEntry, index,
4153 sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4154 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4155 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4156 }
4157
4158
4159 /**
4160 * Emit output declarations for tessellation control shader.
4161 */
4162 static void
emit_tcs_output_declarations(struct svga_shader_emitter_v10 * emit)4163 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4164 {
4165 unsigned int i;
4166 unsigned outputIndex = emit->num_outputs;
4167 struct svga_shader_signature *sgn = &emit->signature;
4168
4169 /**
4170 * Initialize patch_generic_out_count so it won't be counted twice
4171 * since this function is called twice, one for control point phase
4172 * and another time for patch constant phase.
4173 */
4174 emit->tcs.patch_generic_out_count = 0;
4175
4176 for (i = 0; i < emit->info.num_outputs; i++) {
4177 unsigned index = i;
4178 const enum tgsi_semantic semantic_name =
4179 emit->info.output_semantic_name[i];
4180
4181 switch (semantic_name) {
4182 case TGSI_SEMANTIC_TESSINNER:
4183 emit->tcs.inner.tgsi_index = i;
4184
4185 /* skip per-patch output declarations in control point phase */
4186 if (emit->tcs.control_point_phase)
4187 break;
4188
4189 emit->tcs.inner.out_index = outputIndex;
4190 switch (emit->key.tcs.prim_mode) {
4191 case PIPE_PRIM_QUADS:
4192 emit_tesslevel_declaration(emit, outputIndex++,
4193 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4194 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4195 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4196
4197 emit_tesslevel_declaration(emit, outputIndex++,
4198 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4199 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4200 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4201 break;
4202 case PIPE_PRIM_TRIANGLES:
4203 emit_tesslevel_declaration(emit, outputIndex++,
4204 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4205 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4206 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4207 break;
4208 case PIPE_PRIM_LINES:
4209 break;
4210 default:
4211 debug_printf("Unsupported primitive type");
4212 }
4213 break;
4214
4215 case TGSI_SEMANTIC_TESSOUTER:
4216 emit->tcs.outer.tgsi_index = i;
4217
4218 /* skip per-patch output declarations in control point phase */
4219 if (emit->tcs.control_point_phase)
4220 break;
4221
4222 emit->tcs.outer.out_index = outputIndex;
4223 switch (emit->key.tcs.prim_mode) {
4224 case PIPE_PRIM_QUADS:
4225 for (int j = 0; j < 4; j++) {
4226 emit_tesslevel_declaration(emit, outputIndex++,
4227 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4228 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4229 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4230 }
4231 break;
4232 case PIPE_PRIM_TRIANGLES:
4233 for (int j = 0; j < 3; j++) {
4234 emit_tesslevel_declaration(emit, outputIndex++,
4235 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4236 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4237 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4238 }
4239 break;
4240 case PIPE_PRIM_LINES:
4241 for (int j = 0; j < 2; j++) {
4242 emit_tesslevel_declaration(emit, outputIndex++,
4243 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4244 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4245 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4246 }
4247 break;
4248 default:
4249 debug_printf("Unsupported primitive type");
4250 }
4251 break;
4252
4253 case TGSI_SEMANTIC_PATCH:
4254 if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4255 emit->tcs.patch_generic_out_index= i;
4256 emit->tcs.patch_generic_out_count++;
4257
4258 /* skip per-patch output declarations in control point phase */
4259 if (emit->tcs.control_point_phase)
4260 break;
4261
4262 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4263 VGPU10_NAME_UNDEFINED,
4264 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4265 FALSE,
4266 map_tgsi_semantic_to_sgn_name(semantic_name));
4267
4268 SVGA3dDXShaderSignatureEntry *sgnEntry =
4269 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4270 set_shader_signature_entry(sgnEntry, index,
4271 map_tgsi_semantic_to_sgn_name(semantic_name),
4272 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4273 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4274 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4275
4276 break;
4277
4278 default:
4279 /* save the starting index of control point outputs */
4280 if (emit->tcs.control_point_out_index == INVALID_INDEX)
4281 emit->tcs.control_point_out_index = i;
4282 emit->tcs.control_point_out_count++;
4283
4284 /* skip control point output declarations in patch constant phase */
4285 if (!emit->tcs.control_point_phase)
4286 break;
4287
4288 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4289 TRUE);
4290
4291 }
4292 }
4293
4294 if (emit->tcs.control_point_phase) {
4295 /**
4296 * Add missing control point output in control point phase.
4297 */
4298 if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4299 /* use register index after tessellation factors */
4300 switch (emit->key.tcs.prim_mode) {
4301 case PIPE_PRIM_QUADS:
4302 emit->tcs.control_point_out_index = outputIndex + 6;
4303 break;
4304 case PIPE_PRIM_TRIANGLES:
4305 emit->tcs.control_point_out_index = outputIndex + 4;
4306 break;
4307 default:
4308 emit->tcs.control_point_out_index = outputIndex + 2;
4309 break;
4310 }
4311 emit->tcs.control_point_out_count++;
4312 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4313 emit->tcs.control_point_out_index,
4314 VGPU10_NAME_POSITION,
4315 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4316 TRUE,
4317 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4318
4319 /* If tcs does not output any control point output,
4320 * we can end the hull shader control point phase here
4321 * after emitting the default control point output.
4322 */
4323 emit->skip_instruction = TRUE;
4324 }
4325 }
4326 else {
4327 if (emit->tcs.outer.out_index == INVALID_INDEX) {
4328 /* since the TCS did not declare out outer tess level output register,
4329 * we declare it here for patch constant phase only.
4330 */
4331 emit->tcs.outer.out_index = outputIndex;
4332 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4333 for (int i = 0; i < 4; i++) {
4334 emit_tesslevel_declaration(emit, outputIndex++,
4335 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4336 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4337 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4338 }
4339 }
4340 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4341 for (int i = 0; i < 3; i++) {
4342 emit_tesslevel_declaration(emit, outputIndex++,
4343 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4344 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4345 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4346 }
4347 }
4348 }
4349
4350 if (emit->tcs.inner.out_index == INVALID_INDEX) {
4351 /* since the TCS did not declare out inner tess level output register,
4352 * we declare it here
4353 */
4354 emit->tcs.inner.out_index = outputIndex;
4355 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4356 emit_tesslevel_declaration(emit, outputIndex++,
4357 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4358 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4359 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4360 emit_tesslevel_declaration(emit, outputIndex++,
4361 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4362 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4363 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4364 }
4365 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4366 emit_tesslevel_declaration(emit, outputIndex++,
4367 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4368 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4369 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4370 }
4371 }
4372 }
4373 emit->num_outputs = outputIndex;
4374 }
4375
4376
4377 /**
4378 * Emit output declarations for tessellation evaluation shader.
4379 */
4380 static void
emit_tes_output_declarations(struct svga_shader_emitter_v10 * emit)4381 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4382 {
4383 unsigned int i;
4384
4385 for (i = 0; i < emit->info.num_outputs; i++) {
4386 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4387 }
4388 }
4389
4390
4391 /**
4392 * Emit the declaration for a system value input/output.
4393 */
4394 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)4395 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4396 enum tgsi_semantic semantic_name, unsigned index)
4397 {
4398 switch (semantic_name) {
4399 case TGSI_SEMANTIC_INSTANCEID:
4400 index = alloc_system_value_index(emit, index);
4401 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4402 VGPU10_OPERAND_TYPE_INPUT,
4403 VGPU10_OPERAND_INDEX_1D,
4404 index, 1,
4405 VGPU10_NAME_INSTANCE_ID,
4406 VGPU10_OPERAND_4_COMPONENT,
4407 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4408 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4409 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4410 map_tgsi_semantic_to_sgn_name(semantic_name));
4411 break;
4412 case TGSI_SEMANTIC_VERTEXID:
4413 emit->vs.vertex_id_sys_index = index;
4414 index = alloc_system_value_index(emit, index);
4415 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4416 VGPU10_OPERAND_TYPE_INPUT,
4417 VGPU10_OPERAND_INDEX_1D,
4418 index, 1,
4419 VGPU10_NAME_VERTEX_ID,
4420 VGPU10_OPERAND_4_COMPONENT,
4421 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4422 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4423 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4424 map_tgsi_semantic_to_sgn_name(semantic_name));
4425 break;
4426 case TGSI_SEMANTIC_SAMPLEID:
4427 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4428 emit->fs.sample_id_sys_index = index;
4429 index = alloc_system_value_index(emit, index);
4430 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4431 VGPU10_OPERAND_TYPE_INPUT,
4432 VGPU10_OPERAND_INDEX_1D,
4433 index, 1,
4434 VGPU10_NAME_SAMPLE_INDEX,
4435 VGPU10_OPERAND_4_COMPONENT,
4436 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4437 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4438 VGPU10_INTERPOLATION_CONSTANT, TRUE,
4439 map_tgsi_semantic_to_sgn_name(semantic_name));
4440 break;
4441 case TGSI_SEMANTIC_SAMPLEPOS:
4442 /* This system value contains the position of the current sample
4443 * when using per-sample shading. We implement this by calling
4444 * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4445 * index as the argument. See emit_sample_position_instructions().
4446 */
4447 assert(emit->version >= 41);
4448 emit->fs.sample_pos_sys_index = index;
4449 index = alloc_system_value_index(emit, index);
4450 break;
4451 case TGSI_SEMANTIC_INVOCATIONID:
4452 /* Note: invocation id input is mapped to different register depending
4453 * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4454 * In TCS, it will be mapped to vOutputControlPointID#.
4455 * Since in both cases, the mapped name is unique rather than
4456 * just a generic input name ("v#"), so there is no need to remap
4457 * the index value.
4458 */
4459 assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4460 emit->unit == PIPE_SHADER_TESS_CTRL);
4461 assert(emit->version >= 50);
4462
4463 if (emit->unit == PIPE_SHADER_GEOMETRY) {
4464 emit->gs.invocation_id_sys_index = index;
4465 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4466 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4467 VGPU10_OPERAND_INDEX_0D,
4468 index, 1,
4469 VGPU10_NAME_UNDEFINED,
4470 VGPU10_OPERAND_0_COMPONENT,
4471 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4472 0,
4473 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4474 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4475 } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4476 /* The emission of the control point id will be done
4477 * in the control point phase in emit_hull_shader_control_point_phase().
4478 */
4479 emit->tcs.invocation_id_sys_index = index;
4480 }
4481 break;
4482 case TGSI_SEMANTIC_SAMPLEMASK:
4483 /* Note: the PS sample mask input has a unique name ("vCoverage#")
4484 * rather than just a generic input name ("v#") so no need to remap the
4485 * index value.
4486 */
4487 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4488 assert(emit->version >= 50);
4489 emit->fs.sample_mask_in_sys_index = index;
4490 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4491 VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4492 VGPU10_OPERAND_INDEX_0D,
4493 index, 1,
4494 VGPU10_NAME_UNDEFINED,
4495 VGPU10_OPERAND_1_COMPONENT,
4496 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4497 0,
4498 VGPU10_INTERPOLATION_CONSTANT, TRUE,
4499 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4500 break;
4501 case TGSI_SEMANTIC_TESSCOORD:
4502 assert(emit->version >= 50);
4503
4504 unsigned usageMask = 0;
4505
4506 if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4507 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4508 }
4509 else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
4510 emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4511 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4512 }
4513
4514 emit->tes.tesscoord_sys_index = index;
4515 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4516 VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4517 VGPU10_OPERAND_INDEX_0D,
4518 index, 1,
4519 VGPU10_NAME_UNDEFINED,
4520 VGPU10_OPERAND_4_COMPONENT,
4521 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4522 usageMask,
4523 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4524 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4525 break;
4526 case TGSI_SEMANTIC_TESSINNER:
4527 assert(emit->version >= 50);
4528 emit->tes.inner.tgsi_index = index;
4529 break;
4530 case TGSI_SEMANTIC_TESSOUTER:
4531 assert(emit->version >= 50);
4532 emit->tes.outer.tgsi_index = index;
4533 break;
4534 case TGSI_SEMANTIC_VERTICESIN:
4535 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4536 assert(emit->version >= 50);
4537
4538 /* save the system value index */
4539 emit->tcs.vertices_per_patch_index = index;
4540 break;
4541 case TGSI_SEMANTIC_PRIMID:
4542 assert(emit->version >= 50);
4543 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4544 emit->tcs.prim_id_index = index;
4545 }
4546 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4547 emit->tes.prim_id_index = index;
4548 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4549 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4550 VGPU10_OPERAND_INDEX_0D,
4551 index, 1,
4552 VGPU10_NAME_UNDEFINED,
4553 VGPU10_OPERAND_0_COMPONENT,
4554 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4555 0,
4556 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4557 map_tgsi_semantic_to_sgn_name(semantic_name));
4558 }
4559 break;
4560 default:
4561 debug_printf("unexpected system value semantic index %u / %s\n",
4562 semantic_name, tgsi_semantic_names[semantic_name]);
4563 }
4564 }
4565
4566 /**
4567 * Translate a TGSI declaration to VGPU10.
4568 */
4569 static boolean
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)4570 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4571 const struct tgsi_full_declaration *decl)
4572 {
4573 switch (decl->Declaration.File) {
4574 case TGSI_FILE_INPUT:
4575 /* do nothing - see emit_input_declarations() */
4576 return TRUE;
4577
4578 case TGSI_FILE_OUTPUT:
4579 assert(decl->Range.First == decl->Range.Last);
4580 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4581 return TRUE;
4582
4583 case TGSI_FILE_TEMPORARY:
4584 /* Don't declare the temps here. Just keep track of how many
4585 * and emit the declaration later.
4586 */
4587 if (decl->Declaration.Array) {
4588 /* Indexed temporary array. Save the start index of the array
4589 * and the size of the array.
4590 */
4591 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4592 assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4593
4594 /* Save this array so we can emit the declaration for it later */
4595 create_temp_array(emit, arrayID, decl->Range.First,
4596 decl->Range.Last - decl->Range.First + 1,
4597 decl->Range.First);
4598 }
4599
4600 /* for all temps, indexed or not, keep track of highest index */
4601 emit->num_shader_temps = MAX2(emit->num_shader_temps,
4602 decl->Range.Last + 1);
4603 return TRUE;
4604
4605 case TGSI_FILE_CONSTANT:
4606 /* Don't declare constants here. Just keep track and emit later. */
4607 {
4608 unsigned constbuf = 0, num_consts;
4609 if (decl->Declaration.Dimension) {
4610 constbuf = decl->Dim.Index2D;
4611 }
4612 /* We throw an assertion here when, in fact, the shader should never
4613 * have linked due to constbuf index out of bounds, so we shouldn't
4614 * have reached here.
4615 */
4616 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4617
4618 num_consts = MAX2(emit->num_shader_consts[constbuf],
4619 decl->Range.Last + 1);
4620
4621 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4622 debug_printf("Warning: constant buffer is declared to size [%u]"
4623 " but [%u] is the limit.\n",
4624 num_consts,
4625 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4626 }
4627 /* The linker doesn't enforce the max UBO size so we clamp here */
4628 emit->num_shader_consts[constbuf] =
4629 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4630 }
4631 return TRUE;
4632
4633 case TGSI_FILE_IMMEDIATE:
4634 assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4635 return FALSE;
4636
4637 case TGSI_FILE_SYSTEM_VALUE:
4638 emit_system_value_declaration(emit, decl->Semantic.Name,
4639 decl->Range.First);
4640 return TRUE;
4641
4642 case TGSI_FILE_SAMPLER:
4643 /* Don't declare samplers here. Just keep track and emit later. */
4644 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4645 return TRUE;
4646
4647 #if 0
4648 case TGSI_FILE_RESOURCE:
4649 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4650 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4651 assert(!"TGSI_FILE_RESOURCE not handled yet");
4652 return FALSE;
4653 #endif
4654
4655 case TGSI_FILE_ADDRESS:
4656 emit->num_address_regs = MAX2(emit->num_address_regs,
4657 decl->Range.Last + 1);
4658 return TRUE;
4659
4660 case TGSI_FILE_SAMPLER_VIEW:
4661 {
4662 unsigned unit = decl->Range.First;
4663 assert(decl->Range.First == decl->Range.Last);
4664 emit->sampler_target[unit] = decl->SamplerView.Resource;
4665
4666 /* Note: we can ignore YZW return types for now */
4667 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
4668 emit->sampler_view[unit] = TRUE;
4669 }
4670 return TRUE;
4671
4672 default:
4673 assert(!"Unexpected type of declaration");
4674 return FALSE;
4675 }
4676 }
4677
4678
4679
4680 /**
4681 * Emit input declarations for fragment shader.
4682 */
4683 static void
emit_fs_input_declarations(struct svga_shader_emitter_v10 * emit)4684 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
4685 {
4686 unsigned i;
4687
4688 for (i = 0; i < emit->linkage.num_inputs; i++) {
4689 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4690 unsigned usage_mask = emit->info.input_usage_mask[i];
4691 unsigned index = emit->linkage.input_map[i];
4692 unsigned type, interpolationMode, name;
4693 unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4694
4695 if (usage_mask == 0)
4696 continue; /* register is not actually used */
4697
4698 if (semantic_name == TGSI_SEMANTIC_POSITION) {
4699 /* fragment position input */
4700 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4701 interpolationMode = VGPU10_INTERPOLATION_LINEAR;
4702 name = VGPU10_NAME_POSITION;
4703 if (usage_mask & TGSI_WRITEMASK_W) {
4704 /* we need to replace use of 'w' with '1/w' */
4705 emit->fs.fragcoord_input_index = i;
4706 }
4707 }
4708 else if (semantic_name == TGSI_SEMANTIC_FACE) {
4709 /* fragment front-facing input */
4710 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4711 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4712 name = VGPU10_NAME_IS_FRONT_FACE;
4713 emit->fs.face_input_index = i;
4714 }
4715 else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4716 /* primitive ID */
4717 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4718 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4719 name = VGPU10_NAME_PRIMITIVE_ID;
4720 }
4721 else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
4722 /* sample index / ID */
4723 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4724 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4725 name = VGPU10_NAME_SAMPLE_INDEX;
4726 }
4727 else if (semantic_name == TGSI_SEMANTIC_LAYER) {
4728 /* render target array index */
4729 if (emit->key.fs.layer_to_zero) {
4730 /**
4731 * The shader from the previous stage does not write to layer,
4732 * so reading the layer index in fragment shader should return 0.
4733 */
4734 emit->fs.layer_input_index = i;
4735 continue;
4736 } else {
4737 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4738 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4739 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
4740 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4741 }
4742 }
4743 else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
4744 /* viewport index */
4745 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4746 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4747 name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
4748 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4749 }
4750 else {
4751 /* general fragment input */
4752 type = VGPU10_OPCODE_DCL_INPUT_PS;
4753 interpolationMode =
4754 translate_interpolation(emit,
4755 emit->info.input_interpolate[i],
4756 emit->info.input_interpolate_loc[i]);
4757
4758 /* keeps track if flat interpolation mode is being used */
4759 emit->uses_flat_interp = emit->uses_flat_interp ||
4760 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
4761
4762 name = VGPU10_NAME_UNDEFINED;
4763 }
4764
4765 emit_input_declaration(emit, type,
4766 VGPU10_OPERAND_TYPE_INPUT,
4767 VGPU10_OPERAND_INDEX_1D, index, 1,
4768 name,
4769 VGPU10_OPERAND_4_COMPONENT,
4770 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4771 mask,
4772 interpolationMode, TRUE,
4773 map_tgsi_semantic_to_sgn_name(semantic_name));
4774 }
4775 }
4776
4777
4778 /**
4779 * Emit input declarations for vertex shader.
4780 */
4781 static void
emit_vs_input_declarations(struct svga_shader_emitter_v10 * emit)4782 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
4783 {
4784 unsigned i;
4785
4786 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
4787 unsigned usage_mask = emit->info.input_usage_mask[i];
4788 unsigned index = i;
4789
4790 if (usage_mask == 0)
4791 continue; /* register is not actually used */
4792
4793 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4794 VGPU10_OPERAND_TYPE_INPUT,
4795 VGPU10_OPERAND_INDEX_1D, index, 1,
4796 VGPU10_NAME_UNDEFINED,
4797 VGPU10_OPERAND_4_COMPONENT,
4798 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4799 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4800 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4801 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4802 }
4803 }
4804
4805
4806 /**
4807 * Emit input declarations for geometry shader.
4808 */
4809 static void
emit_gs_input_declarations(struct svga_shader_emitter_v10 * emit)4810 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
4811 {
4812 unsigned i;
4813
4814 for (i = 0; i < emit->info.num_inputs; i++) {
4815 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4816 unsigned usage_mask = emit->info.input_usage_mask[i];
4817 unsigned index = emit->linkage.input_map[i];
4818 unsigned opcodeType, operandType;
4819 unsigned numComp, selMode;
4820 unsigned name;
4821 unsigned dim;
4822
4823 if (usage_mask == 0)
4824 continue; /* register is not actually used */
4825
4826 opcodeType = VGPU10_OPCODE_DCL_INPUT;
4827 operandType = VGPU10_OPERAND_TYPE_INPUT;
4828 numComp = VGPU10_OPERAND_4_COMPONENT;
4829 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4830 name = VGPU10_NAME_UNDEFINED;
4831
4832 /* all geometry shader inputs are two dimensional except
4833 * gl_PrimitiveID
4834 */
4835 dim = VGPU10_OPERAND_INDEX_2D;
4836
4837 if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4838 /* Primitive ID */
4839 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
4840 dim = VGPU10_OPERAND_INDEX_0D;
4841 numComp = VGPU10_OPERAND_0_COMPONENT;
4842 selMode = 0;
4843
4844 /* also save the register index so we can check for
4845 * primitive id when emit src register. We need to modify the
4846 * operand type, index dimension when emit primitive id src reg.
4847 */
4848 emit->gs.prim_id_index = i;
4849 }
4850 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4851 /* vertex position input */
4852 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
4853 name = VGPU10_NAME_POSITION;
4854 }
4855
4856 emit_input_declaration(emit, opcodeType, operandType,
4857 dim, index,
4858 emit->gs.input_size,
4859 name,
4860 numComp, selMode,
4861 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4862 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4863 map_tgsi_semantic_to_sgn_name(semantic_name));
4864 }
4865 }
4866
4867
4868 /**
4869 * Emit input declarations for tessellation control shader.
4870 */
4871 static void
emit_tcs_input_declarations(struct svga_shader_emitter_v10 * emit)4872 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
4873 {
4874 unsigned i;
4875 unsigned size = emit->key.tcs.vertices_per_patch;
4876 unsigned indicesMask = 0;
4877 boolean addSignature = TRUE;
4878
4879 if (!emit->tcs.control_point_phase)
4880 addSignature = emit->tcs.fork_phase_add_signature;
4881
4882 for (i = 0; i < emit->info.num_inputs; i++) {
4883 unsigned usage_mask = emit->info.input_usage_mask[i];
4884 unsigned index = emit->linkage.input_map[i];
4885 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4886 VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
4887 VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
4888 SVGA3dDXSignatureSemanticName sgn_name =
4889 map_tgsi_semantic_to_sgn_name(semantic_name);
4890
4891 /* indices that are declared */
4892 indicesMask |= 1 << index;
4893
4894 if (semantic_name == TGSI_SEMANTIC_POSITION ||
4895 index == emit->linkage.position_index) {
4896 /* save the input control point index for later use */
4897 emit->tcs.control_point_input_index = i;
4898 }
4899 else if (usage_mask == 0) {
4900 continue; /* register is not actually used */
4901 }
4902 else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
4903 /* The shadow copy is being used here. So set the signature name
4904 * to UNDEFINED.
4905 */
4906 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
4907 }
4908
4909 /* input control points in the patch constant phase are emitted in the
4910 * vicp register rather than the v register.
4911 */
4912 if (!emit->tcs.control_point_phase) {
4913 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
4914 }
4915
4916 /* Tessellation control shader inputs are two dimensional.
4917 * The array size is determined by the patch vertex count.
4918 */
4919 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4920 operandType,
4921 VGPU10_OPERAND_INDEX_2D,
4922 index, size, name,
4923 VGPU10_OPERAND_4_COMPONENT,
4924 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4925 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4926 VGPU10_INTERPOLATION_UNDEFINED,
4927 addSignature, sgn_name);
4928 }
4929
4930 if (emit->tcs.control_point_phase) {
4931 if (emit->tcs.control_point_input_index == INVALID_INDEX) {
4932
4933 /* Add input control point declaration if it does not exist */
4934 if ((indicesMask & (1 << emit->linkage.position_index)) == 0) {
4935 emit->linkage.input_map[emit->linkage.num_inputs] =
4936 emit->linkage.position_index;
4937 emit->tcs.control_point_input_index = emit->linkage.num_inputs++;
4938
4939 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4940 VGPU10_OPERAND_TYPE_INPUT,
4941 VGPU10_OPERAND_INDEX_2D,
4942 emit->linkage.position_index,
4943 emit->key.tcs.vertices_per_patch,
4944 VGPU10_NAME_UNDEFINED,
4945 VGPU10_OPERAND_4_COMPONENT,
4946 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4947 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4948 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4949 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4950 }
4951 }
4952
4953 /* Also add an address register for the indirection to the
4954 * input control points
4955 */
4956 emit->tcs.control_point_addr_index = emit->num_address_regs++;
4957 }
4958 }
4959
4960
4961 static void
emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 * emit)4962 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
4963 {
4964
4965 /* In tcs, tess factors are emitted as extra outputs.
4966 * The starting register index for the tess factors is captured
4967 * in the compile key.
4968 */
4969 unsigned inputIndex = emit->key.tes.tessfactor_index;
4970
4971 if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4972 if (emit->key.tes.need_tessouter) {
4973 emit->tes.outer.in_index = inputIndex;
4974 for (int i = 0; i < 4; i++) {
4975 emit_tesslevel_declaration(emit, inputIndex++,
4976 VGPU10_OPCODE_DCL_INPUT_SIV,
4977 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4978 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4979 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4980 }
4981 }
4982
4983 if (emit->key.tes.need_tessinner) {
4984 emit->tes.inner.in_index = inputIndex;
4985 emit_tesslevel_declaration(emit, inputIndex++,
4986 VGPU10_OPCODE_DCL_INPUT_SIV,
4987 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4988 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4989 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4990
4991 emit_tesslevel_declaration(emit, inputIndex++,
4992 VGPU10_OPCODE_DCL_INPUT_SIV,
4993 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4994 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4995 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4996 }
4997 }
4998 else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4999 if (emit->key.tes.need_tessouter) {
5000 emit->tes.outer.in_index = inputIndex;
5001 for (int i = 0; i < 3; i++) {
5002 emit_tesslevel_declaration(emit, inputIndex++,
5003 VGPU10_OPCODE_DCL_INPUT_SIV,
5004 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5005 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5006 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5007 }
5008 }
5009
5010 if (emit->key.tes.need_tessinner) {
5011 emit->tes.inner.in_index = inputIndex;
5012 emit_tesslevel_declaration(emit, inputIndex++,
5013 VGPU10_OPCODE_DCL_INPUT_SIV,
5014 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5015 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5016 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5017 }
5018 }
5019 else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
5020 if (emit->key.tes.need_tessouter) {
5021 emit->tes.outer.in_index = inputIndex;
5022 emit_tesslevel_declaration(emit, inputIndex++,
5023 VGPU10_OPCODE_DCL_INPUT_SIV,
5024 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5025 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5026 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5027
5028 emit_tesslevel_declaration(emit, inputIndex++,
5029 VGPU10_OPCODE_DCL_INPUT_SIV,
5030 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5031 VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5032 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5033 }
5034 }
5035 }
5036
5037
5038 /**
5039 * Emit input declarations for tessellation evaluation shader.
5040 */
5041 static void
emit_tes_input_declarations(struct svga_shader_emitter_v10 * emit)5042 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5043 {
5044 unsigned i;
5045
5046 for (i = 0; i < emit->info.num_inputs; i++) {
5047 unsigned usage_mask = emit->info.input_usage_mask[i];
5048 unsigned index = emit->linkage.input_map[i];
5049 unsigned size;
5050 const enum tgsi_semantic semantic_name =
5051 emit->info.input_semantic_name[i];
5052 SVGA3dDXSignatureSemanticName sgn_name;
5053 VGPU10_OPERAND_TYPE operandType;
5054 VGPU10_OPERAND_INDEX_DIMENSION dim;
5055
5056 if (usage_mask == 0)
5057 usage_mask = 1; /* at least set usage mask to one */
5058
5059 if (semantic_name == TGSI_SEMANTIC_PATCH) {
5060 operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5061 dim = VGPU10_OPERAND_INDEX_1D;
5062 size = 1;
5063 sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5064 }
5065 else {
5066 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5067 dim = VGPU10_OPERAND_INDEX_2D;
5068 size = emit->key.tes.vertices_per_patch;
5069 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5070 }
5071
5072 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5073 dim, index, size, VGPU10_NAME_UNDEFINED,
5074 VGPU10_OPERAND_4_COMPONENT,
5075 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5076 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5077 VGPU10_INTERPOLATION_UNDEFINED,
5078 TRUE, sgn_name);
5079 }
5080
5081 emit_tessfactor_input_declarations(emit);
5082
5083 /* DX spec requires DS input controlpoint/patch-constant signatures to match
5084 * the HS output controlpoint/patch-constant signatures exactly.
5085 * Add missing input declarations even if they are not used in the shader.
5086 */
5087 if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5088 struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5089 for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5090
5091 /* If a tcs output does not have a corresponding input register in
5092 * tes, add one.
5093 */
5094 if (emit->linkage.prevShader.output_map[i] >
5095 emit->linkage.input_map_max) {
5096 const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5097
5098 if (sem_name == TGSI_SEMANTIC_PATCH) {
5099 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5100 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5101 VGPU10_OPERAND_INDEX_1D,
5102 i, 1, VGPU10_NAME_UNDEFINED,
5103 VGPU10_OPERAND_4_COMPONENT,
5104 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5105 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5106 VGPU10_INTERPOLATION_UNDEFINED,
5107 TRUE,
5108 map_tgsi_semantic_to_sgn_name(sem_name));
5109
5110 } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5111 sem_name != TGSI_SEMANTIC_TESSOUTER) {
5112 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5113 VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5114 VGPU10_OPERAND_INDEX_2D,
5115 i, emit->key.tes.vertices_per_patch,
5116 VGPU10_NAME_UNDEFINED,
5117 VGPU10_OPERAND_4_COMPONENT,
5118 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5119 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5120 VGPU10_INTERPOLATION_UNDEFINED,
5121 TRUE,
5122 map_tgsi_semantic_to_sgn_name(sem_name));
5123 }
5124 /* tessellation factors are taken care of in
5125 * emit_tessfactor_input_declarations().
5126 */
5127 }
5128 }
5129 }
5130 }
5131
5132
5133 /**
5134 * Emit all input declarations.
5135 */
5136 static boolean
emit_input_declarations(struct svga_shader_emitter_v10 * emit)5137 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5138 {
5139 emit->index_range.required =
5140 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
5141
5142 switch (emit->unit) {
5143 case PIPE_SHADER_FRAGMENT:
5144 emit_fs_input_declarations(emit);
5145 break;
5146 case PIPE_SHADER_GEOMETRY:
5147 emit_gs_input_declarations(emit);
5148 break;
5149 case PIPE_SHADER_VERTEX:
5150 emit_vs_input_declarations(emit);
5151 break;
5152 case PIPE_SHADER_TESS_CTRL:
5153 emit_tcs_input_declarations(emit);
5154 break;
5155 case PIPE_SHADER_TESS_EVAL:
5156 emit_tes_input_declarations(emit);
5157 break;
5158 case PIPE_SHADER_COMPUTE:
5159 //XXX emit_cs_input_declarations(emit);
5160 break;
5161 default:
5162 assert(0);
5163 }
5164
5165 if (emit->index_range.start_index != INVALID_INDEX) {
5166 emit_index_range_declaration(emit);
5167 }
5168 emit->index_range.required = FALSE;
5169 return TRUE;
5170 }
5171
5172
5173 /**
5174 * Emit all output declarations.
5175 */
5176 static boolean
emit_output_declarations(struct svga_shader_emitter_v10 * emit)5177 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5178 {
5179 emit->index_range.required =
5180 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
5181
5182 switch (emit->unit) {
5183 case PIPE_SHADER_FRAGMENT:
5184 emit_fs_output_declarations(emit);
5185 break;
5186 case PIPE_SHADER_GEOMETRY:
5187 emit_gs_output_declarations(emit);
5188 break;
5189 case PIPE_SHADER_VERTEX:
5190 emit_vs_output_declarations(emit);
5191 break;
5192 case PIPE_SHADER_TESS_CTRL:
5193 emit_tcs_output_declarations(emit);
5194 break;
5195 case PIPE_SHADER_TESS_EVAL:
5196 emit_tes_output_declarations(emit);
5197 break;
5198 case PIPE_SHADER_COMPUTE:
5199 //XXX emit_cs_output_declarations(emit);
5200 break;
5201 default:
5202 assert(0);
5203 }
5204
5205 if (emit->vposition.so_index != INVALID_INDEX &&
5206 emit->vposition.out_index != INVALID_INDEX) {
5207
5208 assert(emit->unit != PIPE_SHADER_FRAGMENT);
5209
5210 /* Emit the declaration for the non-adjusted vertex position
5211 * for stream output purpose
5212 */
5213 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5214 emit->vposition.so_index,
5215 VGPU10_NAME_UNDEFINED,
5216 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5217 TRUE,
5218 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5219 }
5220
5221 if (emit->clip_dist_so_index != INVALID_INDEX &&
5222 emit->clip_dist_out_index != INVALID_INDEX) {
5223
5224 assert(emit->unit != PIPE_SHADER_FRAGMENT);
5225
5226 /* Emit the declaration for the clip distance shadow copy which
5227 * will be used for stream output purpose and for clip distance
5228 * varying variable
5229 */
5230 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5231 emit->clip_dist_so_index,
5232 VGPU10_NAME_UNDEFINED,
5233 emit->output_usage_mask[emit->clip_dist_out_index],
5234 TRUE,
5235 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5236
5237 if (emit->info.num_written_clipdistance > 4) {
5238 /* for the second clip distance register, each handles 4 planes */
5239 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5240 emit->clip_dist_so_index + 1,
5241 VGPU10_NAME_UNDEFINED,
5242 emit->output_usage_mask[emit->clip_dist_out_index+1],
5243 TRUE,
5244 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5245 }
5246 }
5247
5248 if (emit->index_range.start_index != INVALID_INDEX) {
5249 emit_index_range_declaration(emit);
5250 }
5251 emit->index_range.required = FALSE;
5252 return TRUE;
5253 }
5254
5255
5256 /**
5257 * A helper function to create a temporary indexable array
5258 * and initialize the corresponding entries in the temp_map array.
5259 */
5260 static void
create_temp_array(struct svga_shader_emitter_v10 * emit,unsigned arrayID,unsigned first,unsigned count,unsigned startIndex)5261 create_temp_array(struct svga_shader_emitter_v10 *emit,
5262 unsigned arrayID, unsigned first, unsigned count,
5263 unsigned startIndex)
5264 {
5265 unsigned i, tempIndex = startIndex;
5266
5267 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5268 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5269 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5270
5271 emit->temp_arrays[arrayID].start = first;
5272 emit->temp_arrays[arrayID].size = count;
5273
5274 /* Fill in the temp_map entries for this temp array */
5275 for (i = 0; i < count; i++, tempIndex++) {
5276 emit->temp_map[tempIndex].arrayId = arrayID;
5277 emit->temp_map[tempIndex].index = i;
5278 }
5279 }
5280
5281
5282 /**
5283 * Emit the declaration for the temporary registers.
5284 */
5285 static boolean
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)5286 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5287 {
5288 unsigned total_temps, reg, i;
5289
5290 total_temps = emit->num_shader_temps;
5291
5292 /* If there is indirect access to non-indexable temps in the shader,
5293 * convert those temps to indexable temps. This works around a bug
5294 * in the GLSL->TGSI translator exposed in piglit test
5295 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5296 * Internal temps added by the driver remain as non-indexable temps.
5297 */
5298 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5299 emit->num_temp_arrays == 0) {
5300 create_temp_array(emit, 1, 0, total_temps, 0);
5301 }
5302
5303 /* Allocate extra temps for specially-implemented instructions,
5304 * such as LIT.
5305 */
5306 total_temps += MAX_INTERNAL_TEMPS;
5307
5308 /* Allocate extra temps for clip distance or clip vertex.
5309 */
5310 if (emit->clip_mode == CLIP_DISTANCE) {
5311 /* We need to write the clip distance to a temporary register
5312 * first. Then it will be copied to the shadow copy for
5313 * the clip distance varying variable and stream output purpose.
5314 * It will also be copied to the actual CLIPDIST register
5315 * according to the enabled clip planes
5316 */
5317 emit->clip_dist_tmp_index = total_temps++;
5318 if (emit->info.num_written_clipdistance > 4)
5319 total_temps++; /* second clip register */
5320 }
5321 else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5322 /* If the current shader is in the last vertex processing stage,
5323 * We need to convert the TGSI CLIPVERTEX output to one or more
5324 * clip distances. Allocate a temp reg for the clipvertex here.
5325 */
5326 assert(emit->info.writes_clipvertex > 0);
5327 emit->clip_vertex_tmp_index = total_temps;
5328 total_temps++;
5329 }
5330
5331 if (emit->info.uses_vertexid) {
5332 assert(emit->unit == PIPE_SHADER_VERTEX);
5333 emit->vs.vertex_id_tmp_index = total_temps++;
5334 }
5335
5336 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5337 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5338 emit->key.clip_plane_enable ||
5339 emit->vposition.so_index != INVALID_INDEX) {
5340 emit->vposition.tmp_index = total_temps;
5341 total_temps += 1;
5342 }
5343
5344 if (emit->vposition.need_prescale) {
5345 emit->vposition.prescale_scale_index = total_temps++;
5346 emit->vposition.prescale_trans_index = total_temps++;
5347 }
5348
5349 if (emit->unit == PIPE_SHADER_VERTEX) {
5350 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5351 emit->key.vs.adjust_attrib_itof |
5352 emit->key.vs.adjust_attrib_utof |
5353 emit->key.vs.attrib_is_bgra |
5354 emit->key.vs.attrib_puint_to_snorm |
5355 emit->key.vs.attrib_puint_to_uscaled |
5356 emit->key.vs.attrib_puint_to_sscaled);
5357 while (attrib_mask) {
5358 unsigned index = u_bit_scan(&attrib_mask);
5359 emit->vs.adjusted_input[index] = total_temps++;
5360 }
5361 }
5362 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5363 if (emit->key.gs.writes_viewport_index)
5364 emit->gs.viewport_index_tmp_index = total_temps++;
5365 }
5366 }
5367 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5368 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5369 emit->key.fs.write_color0_to_n_cbufs > 1) {
5370 /* Allocate a temp to hold the output color */
5371 emit->fs.color_tmp_index = total_temps;
5372 total_temps += 1;
5373 }
5374
5375 if (emit->fs.face_input_index != INVALID_INDEX) {
5376 /* Allocate a temp for the +/-1 face register */
5377 emit->fs.face_tmp_index = total_temps;
5378 total_temps += 1;
5379 }
5380
5381 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5382 /* Allocate a temp for modified fragment position register */
5383 emit->fs.fragcoord_tmp_index = total_temps;
5384 total_temps += 1;
5385 }
5386
5387 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5388 /* Allocate a temp for the sample position */
5389 emit->fs.sample_pos_tmp_index = total_temps++;
5390 }
5391 }
5392 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5393 if (emit->vposition.need_prescale) {
5394 emit->vposition.tmp_index = total_temps++;
5395 emit->vposition.prescale_scale_index = total_temps++;
5396 emit->vposition.prescale_trans_index = total_temps++;
5397 }
5398
5399 if (emit->tes.inner.tgsi_index) {
5400 emit->tes.inner.temp_index = total_temps;
5401 total_temps += 1;
5402 }
5403
5404 if (emit->tes.outer.tgsi_index) {
5405 emit->tes.outer.temp_index = total_temps;
5406 total_temps += 1;
5407 }
5408 }
5409 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5410 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5411 if (!emit->tcs.control_point_phase) {
5412 emit->tcs.inner.temp_index = total_temps;
5413 total_temps += 1;
5414 }
5415 }
5416 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5417 if (!emit->tcs.control_point_phase) {
5418 emit->tcs.outer.temp_index = total_temps;
5419 total_temps += 1;
5420 }
5421 }
5422
5423 if (emit->tcs.control_point_phase &&
5424 emit->info.reads_pervertex_outputs) {
5425 emit->tcs.control_point_tmp_index = total_temps;
5426 total_temps += emit->tcs.control_point_out_count;
5427 }
5428 else if (!emit->tcs.control_point_phase &&
5429 emit->info.reads_perpatch_outputs) {
5430
5431 /* If there is indirect access to the patch constant outputs
5432 * in the control point phase, then an indexable temporary array
5433 * will be created for these patch constant outputs.
5434 * Note, indirect access can only be applicable to
5435 * patch constant outputs in the control point phase.
5436 */
5437 if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5438 unsigned arrayID =
5439 emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5440 create_temp_array(emit, arrayID, 0,
5441 emit->tcs.patch_generic_out_count, total_temps);
5442 }
5443 emit->tcs.patch_generic_tmp_index = total_temps;
5444 total_temps += emit->tcs.patch_generic_out_count;
5445 }
5446
5447 emit->tcs.invocation_id_tmp_index = total_temps++;
5448 }
5449
5450 for (i = 0; i < emit->num_address_regs; i++) {
5451 emit->address_reg_index[i] = total_temps++;
5452 }
5453
5454 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5455 * temp indexes. Basically, we compact all the non-array temp register
5456 * indexes into a consecutive series.
5457 *
5458 * Before, we may have some TGSI declarations like:
5459 * DCL TEMP[0..1], LOCAL
5460 * DCL TEMP[2..4], ARRAY(1), LOCAL
5461 * DCL TEMP[5..7], ARRAY(2), LOCAL
5462 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5463 *
5464 * After, we'll have a map like this:
5465 * temp_map[0] = { array 0, index 0 }
5466 * temp_map[1] = { array 0, index 1 }
5467 * temp_map[2] = { array 1, index 0 }
5468 * temp_map[3] = { array 1, index 1 }
5469 * temp_map[4] = { array 1, index 2 }
5470 * temp_map[5] = { array 2, index 0 }
5471 * temp_map[6] = { array 2, index 1 }
5472 * temp_map[7] = { array 2, index 2 }
5473 * temp_map[8] = { array 0, index 2 }
5474 * temp_map[9] = { array 0, index 3 }
5475 *
5476 * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5477 * temps numbered 0..3
5478 *
5479 * Any time we emit a temporary register index, we'll have to use the
5480 * temp_map[] table to convert the TGSI index to the VGPU10 index.
5481 *
5482 * Finally, we recompute the total_temps value here.
5483 */
5484 reg = 0;
5485 for (i = 0; i < total_temps; i++) {
5486 if (emit->temp_map[i].arrayId == 0) {
5487 emit->temp_map[i].index = reg++;
5488 }
5489 }
5490
5491 if (0) {
5492 debug_printf("total_temps %u\n", total_temps);
5493 for (i = 0; i < total_temps; i++) {
5494 debug_printf("temp %u -> array %u index %u\n",
5495 i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5496 }
5497 }
5498
5499 total_temps = reg;
5500
5501 /* Emit declaration of ordinary temp registers */
5502 if (total_temps > 0) {
5503 VGPU10OpcodeToken0 opcode0;
5504
5505 opcode0.value = 0;
5506 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5507
5508 begin_emit_instruction(emit);
5509 emit_dword(emit, opcode0.value);
5510 emit_dword(emit, total_temps);
5511 end_emit_instruction(emit);
5512 }
5513
5514 /* Emit declarations for indexable temp arrays. Skip 0th entry since
5515 * it's unused.
5516 */
5517 for (i = 1; i < emit->num_temp_arrays; i++) {
5518 unsigned num_temps = emit->temp_arrays[i].size;
5519
5520 if (num_temps > 0) {
5521 VGPU10OpcodeToken0 opcode0;
5522
5523 opcode0.value = 0;
5524 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5525
5526 begin_emit_instruction(emit);
5527 emit_dword(emit, opcode0.value);
5528 emit_dword(emit, i); /* which array */
5529 emit_dword(emit, num_temps);
5530 emit_dword(emit, 4); /* num components */
5531 end_emit_instruction(emit);
5532
5533 total_temps += num_temps;
5534 }
5535 }
5536
5537 /* Check that the grand total of all regular and indexed temps is
5538 * under the limit.
5539 */
5540 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5541
5542 return TRUE;
5543 }
5544
5545
5546 static boolean
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)5547 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5548 {
5549 VGPU10OpcodeToken0 opcode0;
5550 VGPU10OperandToken0 operand0;
5551 unsigned total_consts, i;
5552
5553 opcode0.value = 0;
5554 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5555 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5556 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5557
5558 operand0.value = 0;
5559 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5560 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5561 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5562 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5563 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5564 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5565 operand0.swizzleX = 0;
5566 operand0.swizzleY = 1;
5567 operand0.swizzleZ = 2;
5568 operand0.swizzleW = 3;
5569
5570 /**
5571 * Emit declaration for constant buffer [0]. We also allocate
5572 * room for the extra constants here.
5573 */
5574 total_consts = emit->num_shader_consts[0];
5575
5576 /* Now, allocate constant slots for the "extra" constants.
5577 * Note: it's critical that these extra constant locations
5578 * exactly match what's emitted by the "extra" constants code
5579 * in svga_state_constants.c
5580 */
5581
5582 /* Vertex position scale/translation */
5583 if (emit->vposition.need_prescale) {
5584 emit->vposition.prescale_cbuf_index = total_consts;
5585 total_consts += (2 * emit->vposition.num_prescale);
5586 }
5587
5588 if (emit->unit == PIPE_SHADER_VERTEX) {
5589 if (emit->key.vs.undo_viewport) {
5590 emit->vs.viewport_index = total_consts++;
5591 }
5592 if (emit->key.vs.need_vertex_id_bias) {
5593 emit->vs.vertex_id_bias_index = total_consts++;
5594 }
5595 }
5596
5597 /* user-defined clip planes */
5598 if (emit->key.clip_plane_enable) {
5599 unsigned n = util_bitcount(emit->key.clip_plane_enable);
5600 assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5601 emit->unit != PIPE_SHADER_COMPUTE);
5602 for (i = 0; i < n; i++) {
5603 emit->clip_plane_const[i] = total_consts++;
5604 }
5605 }
5606
5607 for (i = 0; i < emit->num_samplers; i++) {
5608
5609 if (emit->key.tex[i].sampler_view) {
5610 /* Texcoord scale factors for RECT textures */
5611 if (emit->key.tex[i].unnormalized) {
5612 emit->texcoord_scale_index[i] = total_consts++;
5613 }
5614
5615 /* Texture buffer sizes */
5616 if (emit->key.tex[i].target == PIPE_BUFFER) {
5617 emit->texture_buffer_size_index[i] = total_consts++;
5618 }
5619 }
5620 }
5621
5622 if (total_consts > 0) {
5623 begin_emit_instruction(emit);
5624 emit_dword(emit, opcode0.value);
5625 emit_dword(emit, operand0.value);
5626 emit_dword(emit, 0); /* which const buffer slot */
5627 emit_dword(emit, total_consts);
5628 end_emit_instruction(emit);
5629 }
5630
5631 /* Declare remaining constant buffers (UBOs) */
5632 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
5633 if (emit->num_shader_consts[i] > 0) {
5634 begin_emit_instruction(emit);
5635 emit_dword(emit, opcode0.value);
5636 emit_dword(emit, operand0.value);
5637 emit_dword(emit, i); /* which const buffer slot */
5638 emit_dword(emit, emit->num_shader_consts[i]);
5639 end_emit_instruction(emit);
5640 }
5641 }
5642
5643 return TRUE;
5644 }
5645
5646
5647 /**
5648 * Emit declarations for samplers.
5649 */
5650 static boolean
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)5651 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
5652 {
5653 unsigned i;
5654
5655 for (i = 0; i < emit->num_samplers; i++) {
5656 VGPU10OpcodeToken0 opcode0;
5657 VGPU10OperandToken0 operand0;
5658
5659 opcode0.value = 0;
5660 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
5661 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
5662
5663 operand0.value = 0;
5664 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5665 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
5666 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5667 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5668
5669 begin_emit_instruction(emit);
5670 emit_dword(emit, opcode0.value);
5671 emit_dword(emit, operand0.value);
5672 emit_dword(emit, i);
5673 end_emit_instruction(emit);
5674 }
5675
5676 return TRUE;
5677 }
5678
5679
5680 /**
5681 * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5682 */
5683 static unsigned
pipe_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,boolean is_array)5684 pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
5685 unsigned num_samples,
5686 boolean is_array)
5687 {
5688 switch (target) {
5689 case PIPE_BUFFER:
5690 return VGPU10_RESOURCE_DIMENSION_BUFFER;
5691 case PIPE_TEXTURE_1D:
5692 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5693 case PIPE_TEXTURE_2D:
5694 return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
5695 VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5696 case PIPE_TEXTURE_RECT:
5697 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5698 case PIPE_TEXTURE_3D:
5699 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5700 case PIPE_TEXTURE_CUBE:
5701 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5702 case PIPE_TEXTURE_1D_ARRAY:
5703 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5704 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5705 case PIPE_TEXTURE_2D_ARRAY:
5706 if (num_samples > 2 && is_array)
5707 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
5708 else if (is_array)
5709 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
5710 else
5711 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5712 case PIPE_TEXTURE_CUBE_ARRAY:
5713 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
5714 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5715 default:
5716 assert(!"Unexpected resource type");
5717 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5718 }
5719 }
5720
5721
5722 /**
5723 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5724 */
5725 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,boolean is_array)5726 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
5727 unsigned num_samples,
5728 boolean is_array)
5729 {
5730 if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
5731 target = TGSI_TEXTURE_2D;
5732 }
5733 else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
5734 target = TGSI_TEXTURE_2D_ARRAY;
5735 }
5736
5737 switch (target) {
5738 case TGSI_TEXTURE_BUFFER:
5739 return VGPU10_RESOURCE_DIMENSION_BUFFER;
5740 case TGSI_TEXTURE_1D:
5741 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5742 case TGSI_TEXTURE_2D:
5743 case TGSI_TEXTURE_RECT:
5744 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5745 case TGSI_TEXTURE_3D:
5746 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5747 case TGSI_TEXTURE_CUBE:
5748 case TGSI_TEXTURE_SHADOWCUBE:
5749 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5750 case TGSI_TEXTURE_SHADOW1D:
5751 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5752 case TGSI_TEXTURE_SHADOW2D:
5753 case TGSI_TEXTURE_SHADOWRECT:
5754 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5755 case TGSI_TEXTURE_1D_ARRAY:
5756 case TGSI_TEXTURE_SHADOW1D_ARRAY:
5757 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5758 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5759 case TGSI_TEXTURE_2D_ARRAY:
5760 case TGSI_TEXTURE_SHADOW2D_ARRAY:
5761 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
5762 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5763 case TGSI_TEXTURE_2D_MSAA:
5764 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5765 case TGSI_TEXTURE_2D_ARRAY_MSAA:
5766 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
5767 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5768 case TGSI_TEXTURE_CUBE_ARRAY:
5769 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
5770 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
5771 : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5772 default:
5773 assert(!"Unexpected resource type");
5774 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5775 }
5776 }
5777
5778
5779 /**
5780 * Given a tgsi_return_type, return true iff it is an integer type.
5781 */
5782 static boolean
is_integer_type(enum tgsi_return_type type)5783 is_integer_type(enum tgsi_return_type type)
5784 {
5785 switch (type) {
5786 case TGSI_RETURN_TYPE_SINT:
5787 case TGSI_RETURN_TYPE_UINT:
5788 return TRUE;
5789 case TGSI_RETURN_TYPE_FLOAT:
5790 case TGSI_RETURN_TYPE_UNORM:
5791 case TGSI_RETURN_TYPE_SNORM:
5792 return FALSE;
5793 case TGSI_RETURN_TYPE_COUNT:
5794 default:
5795 assert(!"is_integer_type: Unknown tgsi_return_type");
5796 return FALSE;
5797 }
5798 }
5799
5800
5801 /**
5802 * Emit declarations for resources.
5803 * XXX When we're sure that all TGSI shaders will be generated with
5804 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
5805 * rework this code.
5806 */
5807 static boolean
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)5808 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
5809 {
5810 unsigned i;
5811
5812 /* Emit resource decl for each sampler */
5813 for (i = 0; i < emit->num_samplers; i++) {
5814 VGPU10OpcodeToken0 opcode0;
5815 VGPU10OperandToken0 operand0;
5816 VGPU10ResourceReturnTypeToken return_type;
5817 VGPU10_RESOURCE_RETURN_TYPE rt;
5818
5819 opcode0.value = 0;
5820 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
5821 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
5822 opcode0.resourceDimension =
5823 tgsi_texture_to_resource_dimension(emit->sampler_target[i],
5824 emit->key.tex[i].num_samples,
5825 emit->key.tex[i].is_array);
5826 }
5827 else {
5828 opcode0.resourceDimension =
5829 pipe_texture_to_resource_dimension(emit->key.tex[i].target,
5830 emit->key.tex[i].num_samples,
5831 emit->key.tex[i].is_array);
5832 }
5833 opcode0.sampleCount = emit->key.tex[i].num_samples;
5834 operand0.value = 0;
5835 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5836 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5837 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5838 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5839
5840 #if 1
5841 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
5842 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
5843 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
5844 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
5845 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
5846 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
5847 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
5848 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
5849 rt = emit->sampler_return_type[i] + 1;
5850 }
5851 else {
5852 rt = emit->key.tex[i].sampler_return_type;
5853 }
5854 #else
5855 switch (emit->sampler_return_type[i]) {
5856 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
5857 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
5858 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
5859 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break;
5860 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
5861 case TGSI_RETURN_TYPE_COUNT:
5862 default:
5863 rt = VGPU10_RETURN_TYPE_FLOAT;
5864 assert(!"emit_resource_declarations: Unknown tgsi_return_type");
5865 }
5866 #endif
5867
5868 return_type.value = 0;
5869 return_type.component0 = rt;
5870 return_type.component1 = rt;
5871 return_type.component2 = rt;
5872 return_type.component3 = rt;
5873
5874 begin_emit_instruction(emit);
5875 emit_dword(emit, opcode0.value);
5876 emit_dword(emit, operand0.value);
5877 emit_dword(emit, i);
5878 emit_dword(emit, return_type.value);
5879 end_emit_instruction(emit);
5880 }
5881
5882 return TRUE;
5883 }
5884
5885 /**
5886 * Emit instruction with n=1, 2 or 3 source registers.
5887 */
5888 static void
emit_instruction_opn(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,boolean saturate,bool precise)5889 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
5890 unsigned opcode,
5891 const struct tgsi_full_dst_register *dst,
5892 const struct tgsi_full_src_register *src1,
5893 const struct tgsi_full_src_register *src2,
5894 const struct tgsi_full_src_register *src3,
5895 boolean saturate, bool precise)
5896 {
5897 begin_emit_instruction(emit);
5898 emit_opcode_precise(emit, opcode, saturate, precise);
5899 emit_dst_register(emit, dst);
5900 emit_src_register(emit, src1);
5901 if (src2) {
5902 emit_src_register(emit, src2);
5903 }
5904 if (src3) {
5905 emit_src_register(emit, src3);
5906 }
5907 end_emit_instruction(emit);
5908 }
5909
5910 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)5911 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
5912 unsigned opcode,
5913 const struct tgsi_full_dst_register *dst,
5914 const struct tgsi_full_src_register *src)
5915 {
5916 emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
5917 }
5918
5919 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2)5920 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
5921 VGPU10_OPCODE_TYPE opcode,
5922 const struct tgsi_full_dst_register *dst,
5923 const struct tgsi_full_src_register *src1,
5924 const struct tgsi_full_src_register *src2)
5925 {
5926 emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
5927 }
5928
5929 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3)5930 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
5931 VGPU10_OPCODE_TYPE opcode,
5932 const struct tgsi_full_dst_register *dst,
5933 const struct tgsi_full_src_register *src1,
5934 const struct tgsi_full_src_register *src2,
5935 const struct tgsi_full_src_register *src3)
5936 {
5937 emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
5938 }
5939
5940 static void
emit_instruction_op0(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode)5941 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
5942 VGPU10_OPCODE_TYPE opcode)
5943 {
5944 begin_emit_instruction(emit);
5945 emit_opcode(emit, opcode, FALSE);
5946 end_emit_instruction(emit);
5947 }
5948
5949 /**
5950 * Tessellation inner/outer levels needs to be store into its
5951 * appropriate registers depending on prim_mode.
5952 */
5953 static void
store_tesslevels(struct svga_shader_emitter_v10 * emit)5954 store_tesslevels(struct svga_shader_emitter_v10 *emit)
5955 {
5956 int i;
5957
5958 /* tessellation levels are required input/out in hull shader.
5959 * emitting the inner/outer tessellation levels, either from
5960 * values provided in tcs or fallback default values which is 1.0
5961 */
5962 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
5963 struct tgsi_full_src_register temp_src;
5964
5965 if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5966 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5967 else
5968 temp_src = make_immediate_reg_float(emit, 1.0f);
5969
5970 for (i = 0; i < 2; i++) {
5971 struct tgsi_full_src_register src =
5972 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5973 struct tgsi_full_dst_register dst =
5974 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
5975 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5976 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5977 }
5978
5979 if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5980 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5981 else
5982 temp_src = make_immediate_reg_float(emit, 1.0f);
5983
5984 for (i = 0; i < 4; i++) {
5985 struct tgsi_full_src_register src =
5986 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5987 struct tgsi_full_dst_register dst =
5988 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5989 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5990 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5991 }
5992 }
5993 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
5994 struct tgsi_full_src_register temp_src;
5995
5996 if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5997 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5998 else
5999 temp_src = make_immediate_reg_float(emit, 1.0f);
6000
6001 struct tgsi_full_src_register src =
6002 scalar_src(&temp_src, TGSI_SWIZZLE_X);
6003 struct tgsi_full_dst_register dst =
6004 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6005 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6006 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6007
6008 if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6009 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6010 else
6011 temp_src = make_immediate_reg_float(emit, 1.0f);
6012
6013 for (i = 0; i < 3; i++) {
6014 struct tgsi_full_src_register src =
6015 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6016 struct tgsi_full_dst_register dst =
6017 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6018 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6019 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6020 }
6021 }
6022 else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
6023 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6024 struct tgsi_full_src_register temp_src =
6025 make_src_temp_reg(emit->tcs.outer.temp_index);
6026 for (i = 0; i < 2; i++) {
6027 struct tgsi_full_src_register src =
6028 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6029 struct tgsi_full_dst_register dst =
6030 make_dst_reg(TGSI_FILE_OUTPUT,
6031 emit->tcs.outer.out_index + i);
6032 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6033 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6034 }
6035 }
6036 }
6037 else {
6038 debug_printf("Unsupported primitive type");
6039 }
6040 }
6041
6042
6043 /**
6044 * Emit the actual clip distance instructions to be used for clipping
6045 * by copying the clip distance from the temporary registers to the
6046 * CLIPDIST registers written with the enabled planes mask.
6047 * Also copy the clip distance from the temporary to the clip distance
6048 * shadow copy register which will be referenced by the input shader
6049 */
6050 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)6051 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6052 {
6053 struct tgsi_full_src_register tmp_clip_dist_src;
6054 struct tgsi_full_dst_register clip_dist_dst;
6055
6056 unsigned i;
6057 unsigned clip_plane_enable = emit->key.clip_plane_enable;
6058 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6059 int num_written_clipdist = emit->info.num_written_clipdistance;
6060
6061 assert(emit->clip_dist_out_index != INVALID_INDEX);
6062 assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6063
6064 /**
6065 * Temporary reset the temporary clip dist register index so
6066 * that the copy to the real clip dist register will not
6067 * attempt to copy to the temporary register again
6068 */
6069 emit->clip_dist_tmp_index = INVALID_INDEX;
6070
6071 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6072
6073 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6074
6075 /**
6076 * copy to the shadow copy for use by varying variable and
6077 * stream output. All clip distances
6078 * will be written regardless of the enabled clipping planes.
6079 */
6080 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6081 emit->clip_dist_so_index + i);
6082
6083 /* MOV clip_dist_so, tmp_clip_dist */
6084 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6085 &tmp_clip_dist_src);
6086
6087 /**
6088 * copy those clip distances to enabled clipping planes
6089 * to CLIPDIST registers for clipping
6090 */
6091 if (clip_plane_enable & 0xf) {
6092 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6093 emit->clip_dist_out_index + i);
6094 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6095
6096 /* MOV CLIPDIST, tmp_clip_dist */
6097 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6098 &tmp_clip_dist_src);
6099 }
6100 /* four clip planes per clip register */
6101 clip_plane_enable >>= 4;
6102 }
6103 /**
6104 * set the temporary clip dist register index back to the
6105 * temporary index for the next vertex
6106 */
6107 emit->clip_dist_tmp_index = clip_dist_tmp_index;
6108 }
6109
6110 /* Declare clip distance output registers for user-defined clip planes
6111 * or the TGSI_CLIPVERTEX output.
6112 */
6113 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)6114 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6115 {
6116 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6117 unsigned index = emit->num_outputs;
6118 unsigned plane_mask;
6119
6120 assert(emit->unit != PIPE_SHADER_FRAGMENT);
6121 assert(num_clip_planes <= 8);
6122
6123 if (emit->clip_mode != CLIP_LEGACY &&
6124 emit->clip_mode != CLIP_VERTEX) {
6125 return;
6126 }
6127
6128 if (num_clip_planes == 0)
6129 return;
6130
6131 /* Convert clip vertex to clip distances only in the last vertex stage */
6132 if (!emit->key.last_vertex_stage)
6133 return;
6134
6135 /* Declare one or two clip output registers. The number of components
6136 * in the mask reflects the number of clip planes. For example, if 5
6137 * clip planes are needed, we'll declare outputs similar to:
6138 * dcl_output_siv o2.xyzw, clip_distance
6139 * dcl_output_siv o3.x, clip_distance
6140 */
6141 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6142
6143 plane_mask = (1 << num_clip_planes) - 1;
6144 if (plane_mask & 0xf) {
6145 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6146 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6147 VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6148 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6149 emit->num_outputs++;
6150 }
6151 if (plane_mask & 0xf0) {
6152 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6153 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6154 VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6155 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6156 emit->num_outputs++;
6157 }
6158 }
6159
6160
6161 /**
6162 * Emit the instructions for writing to the clip distance registers
6163 * to handle legacy/automatic clip planes.
6164 * For each clip plane, the distance is the dot product of the vertex
6165 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6166 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6167 * output registers already declared.
6168 */
6169 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)6170 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6171 unsigned vpos_tmp_index)
6172 {
6173 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6174
6175 assert(emit->clip_mode == CLIP_LEGACY);
6176 assert(num_clip_planes <= 8);
6177
6178 assert(emit->unit == PIPE_SHADER_VERTEX ||
6179 emit->unit == PIPE_SHADER_GEOMETRY ||
6180 emit->unit == PIPE_SHADER_TESS_EVAL);
6181
6182 for (i = 0; i < num_clip_planes; i++) {
6183 struct tgsi_full_dst_register dst;
6184 struct tgsi_full_src_register plane_src, vpos_src;
6185 unsigned reg_index = emit->clip_dist_out_index + i / 4;
6186 unsigned comp = i % 4;
6187 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6188
6189 /* create dst, src regs */
6190 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6191 dst = writemask_dst(&dst, writemask);
6192
6193 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6194 vpos_src = make_src_temp_reg(vpos_tmp_index);
6195
6196 /* DP4 clip_dist, plane, vpos */
6197 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6198 &plane_src, &vpos_src);
6199 }
6200 }
6201
6202
6203 /**
6204 * Emit the instructions for computing the clip distance results from
6205 * the clip vertex temporary.
6206 * For each clip plane, the distance is the dot product of the clip vertex
6207 * position (found in a temp reg) and the clip plane coefficients.
6208 */
6209 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)6210 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6211 {
6212 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6213 unsigned i;
6214 struct tgsi_full_dst_register dst;
6215 struct tgsi_full_src_register clipvert_src;
6216 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6217
6218 assert(emit->unit == PIPE_SHADER_VERTEX ||
6219 emit->unit == PIPE_SHADER_GEOMETRY ||
6220 emit->unit == PIPE_SHADER_TESS_EVAL);
6221
6222 assert(emit->clip_mode == CLIP_VERTEX);
6223
6224 clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6225
6226 for (i = 0; i < num_clip; i++) {
6227 struct tgsi_full_src_register plane_src;
6228 unsigned reg_index = emit->clip_dist_out_index + i / 4;
6229 unsigned comp = i % 4;
6230 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6231
6232 /* create dst, src regs */
6233 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6234 dst = writemask_dst(&dst, writemask);
6235
6236 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6237
6238 /* DP4 clip_dist, plane, vpos */
6239 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6240 &plane_src, &clipvert_src);
6241 }
6242
6243 /* copy temporary clip vertex register to the clip vertex register */
6244
6245 assert(emit->clip_vertex_out_index != INVALID_INDEX);
6246
6247 /**
6248 * temporary reset the temporary clip vertex register index so
6249 * that copy to the clip vertex register will not attempt
6250 * to copy to the temporary register again
6251 */
6252 emit->clip_vertex_tmp_index = INVALID_INDEX;
6253
6254 /* MOV clip_vertex, clip_vertex_tmp */
6255 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6256 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6257 &dst, &clipvert_src);
6258
6259 /**
6260 * set the temporary clip vertex register index back to the
6261 * temporary index for the next vertex
6262 */
6263 emit->clip_vertex_tmp_index = clip_vertex_tmp;
6264 }
6265
6266 /**
6267 * Emit code to convert RGBA to BGRA
6268 */
6269 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6270 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6271 const struct tgsi_full_dst_register *dst,
6272 const struct tgsi_full_src_register *src)
6273 {
6274 struct tgsi_full_src_register bgra_src =
6275 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6276
6277 begin_emit_instruction(emit);
6278 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6279 emit_dst_register(emit, dst);
6280 emit_src_register(emit, &bgra_src);
6281 end_emit_instruction(emit);
6282 }
6283
6284
6285 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6286 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6287 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6288 const struct tgsi_full_dst_register *dst,
6289 const struct tgsi_full_src_register *src)
6290 {
6291 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6292 struct tgsi_full_src_register two =
6293 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6294 struct tgsi_full_src_register neg_two =
6295 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6296
6297 unsigned val_tmp = get_temp_index(emit);
6298 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6299 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6300
6301 unsigned bias_tmp = get_temp_index(emit);
6302 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6303 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6304
6305 /* val = src * 2.0 */
6306 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6307
6308 /* bias = src > 0.5 */
6309 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6310
6311 /* bias = bias & -2.0 */
6312 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6313 &bias_src, &neg_two);
6314
6315 /* dst = val + bias */
6316 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6317 &val_src, &bias_src);
6318
6319 free_temp_indexes(emit);
6320 }
6321
6322
6323 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6324 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6325 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6326 const struct tgsi_full_dst_register *dst,
6327 const struct tgsi_full_src_register *src)
6328 {
6329 struct tgsi_full_src_register scale =
6330 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6331
6332 /* dst = src * scale */
6333 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
6334 }
6335
6336
6337 /** Convert from R32_UINT to 10_10_10_2_sscaled */
6338 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6339 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
6340 const struct tgsi_full_dst_register *dst,
6341 const struct tgsi_full_src_register *src)
6342 {
6343 struct tgsi_full_src_register lshift =
6344 make_immediate_reg_int4(emit, 22, 12, 2, 0);
6345 struct tgsi_full_src_register rshift =
6346 make_immediate_reg_int4(emit, 22, 22, 22, 30);
6347
6348 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
6349
6350 unsigned tmp = get_temp_index(emit);
6351 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6352 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6353
6354 /*
6355 * r = (pixel << 22) >> 22; # signed int in [511, -512]
6356 * g = (pixel << 12) >> 22; # signed int in [511, -512]
6357 * b = (pixel << 2) >> 22; # signed int in [511, -512]
6358 * a = (pixel << 0) >> 30; # signed int in [1, -2]
6359 * dst = i_to_f(r,g,b,a); # convert to float
6360 */
6361 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
6362 &src_xxxx, &lshift);
6363 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
6364 &tmp_src, &rshift);
6365 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
6366
6367 free_temp_indexes(emit);
6368 }
6369
6370
6371 /**
6372 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6373 */
6374 static boolean
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6375 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
6376 const struct tgsi_full_instruction *inst)
6377 {
6378 unsigned index = inst->Dst[0].Register.Index;
6379 struct tgsi_full_dst_register dst;
6380 VGPU10_OPCODE_TYPE opcode;
6381
6382 assert(index < MAX_VGPU10_ADDR_REGS);
6383 dst = make_dst_temp_reg(emit->address_reg_index[index]);
6384 dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
6385
6386 /* ARL dst, s0
6387 * Translates into:
6388 * FTOI address_tmp, s0
6389 *
6390 * UARL dst, s0
6391 * Translates into:
6392 * MOV address_tmp, s0
6393 */
6394 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
6395 opcode = VGPU10_OPCODE_FTOI;
6396 else
6397 opcode = VGPU10_OPCODE_MOV;
6398
6399 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
6400
6401 return TRUE;
6402 }
6403
6404
6405 /**
6406 * Emit code for TGSI_OPCODE_CAL instruction.
6407 */
6408 static boolean
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6409 emit_cal(struct svga_shader_emitter_v10 *emit,
6410 const struct tgsi_full_instruction *inst)
6411 {
6412 unsigned label = inst->Label.Label;
6413 VGPU10OperandToken0 operand;
6414 operand.value = 0;
6415 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
6416
6417 begin_emit_instruction(emit);
6418 emit_dword(emit, operand.value);
6419 emit_dword(emit, label);
6420 end_emit_instruction(emit);
6421
6422 return TRUE;
6423 }
6424
6425
6426 /**
6427 * Emit code for TGSI_OPCODE_IABS instruction.
6428 */
6429 static boolean
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6430 emit_iabs(struct svga_shader_emitter_v10 *emit,
6431 const struct tgsi_full_instruction *inst)
6432 {
6433 /* dst.x = (src0.x < 0) ? -src0.x : src0.x
6434 * dst.y = (src0.y < 0) ? -src0.y : src0.y
6435 * dst.z = (src0.z < 0) ? -src0.z : src0.z
6436 * dst.w = (src0.w < 0) ? -src0.w : src0.w
6437 *
6438 * Translates into
6439 * IMAX dst, src, neg(src)
6440 */
6441 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
6442 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
6443 &inst->Src[0], &neg_src);
6444
6445 return TRUE;
6446 }
6447
6448
6449 /**
6450 * Emit code for TGSI_OPCODE_CMP instruction.
6451 */
6452 static boolean
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6453 emit_cmp(struct svga_shader_emitter_v10 *emit,
6454 const struct tgsi_full_instruction *inst)
6455 {
6456 /* dst.x = (src0.x < 0) ? src1.x : src2.x
6457 * dst.y = (src0.y < 0) ? src1.y : src2.y
6458 * dst.z = (src0.z < 0) ? src1.z : src2.z
6459 * dst.w = (src0.w < 0) ? src1.w : src2.w
6460 *
6461 * Translates into
6462 * LT tmp, src0, 0.0
6463 * MOVC dst, tmp, src1, src2
6464 */
6465 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6466 unsigned tmp = get_temp_index(emit);
6467 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6468 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6469
6470 emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
6471 &inst->Src[0], &zero, NULL, FALSE,
6472 inst->Instruction.Precise);
6473 emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
6474 &tmp_src, &inst->Src[1], &inst->Src[2],
6475 inst->Instruction.Saturate, FALSE);
6476
6477 free_temp_indexes(emit);
6478
6479 return TRUE;
6480 }
6481
6482
6483 /**
6484 * Emit code for TGSI_OPCODE_DST instruction.
6485 */
6486 static boolean
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6487 emit_dst(struct svga_shader_emitter_v10 *emit,
6488 const struct tgsi_full_instruction *inst)
6489 {
6490 /*
6491 * dst.x = 1
6492 * dst.y = src0.y * src1.y
6493 * dst.z = src0.z
6494 * dst.w = src1.w
6495 */
6496
6497 struct tgsi_full_src_register s0_yyyy =
6498 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6499 struct tgsi_full_src_register s0_zzzz =
6500 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
6501 struct tgsi_full_src_register s1_yyyy =
6502 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
6503 struct tgsi_full_src_register s1_wwww =
6504 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
6505
6506 /*
6507 * If dst and either src0 and src1 are the same we need
6508 * to create a temporary for it and insert a extra move.
6509 */
6510 unsigned tmp_move = get_temp_index(emit);
6511 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6512 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6513
6514 /* MOV dst.x, 1.0 */
6515 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6516 struct tgsi_full_dst_register dst_x =
6517 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6518 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6519
6520 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6521 }
6522
6523 /* MUL dst.y, s0.y, s1.y */
6524 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6525 struct tgsi_full_dst_register dst_y =
6526 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6527
6528 emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
6529 &s1_yyyy, NULL, inst->Instruction.Saturate,
6530 inst->Instruction.Precise);
6531 }
6532
6533 /* MOV dst.z, s0.z */
6534 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6535 struct tgsi_full_dst_register dst_z =
6536 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6537
6538 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6539 &dst_z, &s0_zzzz, NULL, NULL,
6540 inst->Instruction.Saturate,
6541 inst->Instruction.Precise);
6542 }
6543
6544 /* MOV dst.w, s1.w */
6545 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6546 struct tgsi_full_dst_register dst_w =
6547 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6548
6549 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6550 &dst_w, &s1_wwww, NULL, NULL,
6551 inst->Instruction.Saturate,
6552 inst->Instruction.Precise);
6553 }
6554
6555 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6556 free_temp_indexes(emit);
6557
6558 return TRUE;
6559 }
6560
6561
6562 /**
6563 * A helper function to return the stream index as specified in
6564 * the immediate register
6565 */
6566 static inline unsigned
find_stream_index(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)6567 find_stream_index(struct svga_shader_emitter_v10 *emit,
6568 const struct tgsi_full_src_register *src)
6569 {
6570 return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
6571 }
6572
6573
6574 /**
6575 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
6576 */
6577 static boolean
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6578 emit_endprim(struct svga_shader_emitter_v10 *emit,
6579 const struct tgsi_full_instruction *inst)
6580 {
6581 assert(emit->unit == PIPE_SHADER_GEOMETRY);
6582
6583 begin_emit_instruction(emit);
6584 if (emit->version >= 50) {
6585 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
6586
6587 if (emit->info.num_stream_output_components[streamIndex] == 0) {
6588 /**
6589 * If there is no output for this stream, discard this instruction.
6590 */
6591 emit->discard_instruction = TRUE;
6592 }
6593 else {
6594 emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
6595 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
6596 emit_stream_register(emit, streamIndex);
6597 }
6598 }
6599 else {
6600 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
6601 }
6602 end_emit_instruction(emit);
6603 return TRUE;
6604 }
6605
6606
6607 /**
6608 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
6609 */
6610 static boolean
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6611 emit_ex2(struct svga_shader_emitter_v10 *emit,
6612 const struct tgsi_full_instruction *inst)
6613 {
6614 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
6615 * while VGPU10 computes four values.
6616 *
6617 * dst = EX2(src):
6618 * dst.xyzw = 2.0 ^ src.x
6619 */
6620
6621 struct tgsi_full_src_register src_xxxx =
6622 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6623 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6624
6625 /* EXP tmp, s0.xxxx */
6626 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
6627 NULL, NULL,
6628 inst->Instruction.Saturate,
6629 inst->Instruction.Precise);
6630
6631 return TRUE;
6632 }
6633
6634
6635 /**
6636 * Emit code for TGSI_OPCODE_EXP instruction.
6637 */
6638 static boolean
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6639 emit_exp(struct svga_shader_emitter_v10 *emit,
6640 const struct tgsi_full_instruction *inst)
6641 {
6642 /*
6643 * dst.x = 2 ^ floor(s0.x)
6644 * dst.y = s0.x - floor(s0.x)
6645 * dst.z = 2 ^ s0.x
6646 * dst.w = 1.0
6647 */
6648
6649 struct tgsi_full_src_register src_xxxx =
6650 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6651 unsigned tmp = get_temp_index(emit);
6652 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6653 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6654
6655 /*
6656 * If dst and src are the same we need to create
6657 * a temporary for it and insert a extra move.
6658 */
6659 unsigned tmp_move = get_temp_index(emit);
6660 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6661 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6662
6663 /* only use X component of temp reg */
6664 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6665 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6666
6667 /* ROUND_NI tmp.x, s0.x */
6668 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
6669 &src_xxxx); /* round to -infinity */
6670
6671 /* EXP dst.x, tmp.x */
6672 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6673 struct tgsi_full_dst_register dst_x =
6674 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6675
6676 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
6677 NULL, NULL,
6678 inst->Instruction.Saturate,
6679 inst->Instruction.Precise);
6680 }
6681
6682 /* ADD dst.y, s0.x, -tmp */
6683 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6684 struct tgsi_full_dst_register dst_y =
6685 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6686 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
6687
6688 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
6689 &neg_tmp_src, NULL,
6690 inst->Instruction.Saturate,
6691 inst->Instruction.Precise);
6692 }
6693
6694 /* EXP dst.z, s0.x */
6695 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6696 struct tgsi_full_dst_register dst_z =
6697 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6698
6699 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
6700 NULL, NULL,
6701 inst->Instruction.Saturate,
6702 inst->Instruction.Precise);
6703 }
6704
6705 /* MOV dst.w, 1.0 */
6706 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6707 struct tgsi_full_dst_register dst_w =
6708 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6709 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6710
6711 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6712 }
6713
6714 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6715
6716 free_temp_indexes(emit);
6717
6718 return TRUE;
6719 }
6720
6721
6722 /**
6723 * Emit code for TGSI_OPCODE_IF instruction.
6724 */
6725 static boolean
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)6726 emit_if(struct svga_shader_emitter_v10 *emit,
6727 const struct tgsi_full_src_register *src)
6728 {
6729 VGPU10OpcodeToken0 opcode0;
6730
6731 /* The src register should be a scalar */
6732 assert(src->Register.SwizzleX == src->Register.SwizzleY &&
6733 src->Register.SwizzleX == src->Register.SwizzleZ &&
6734 src->Register.SwizzleX == src->Register.SwizzleW);
6735
6736 /* The only special thing here is that we need to set the
6737 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
6738 * src.x is non-zero.
6739 */
6740 opcode0.value = 0;
6741 opcode0.opcodeType = VGPU10_OPCODE_IF;
6742 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
6743
6744 begin_emit_instruction(emit);
6745 emit_dword(emit, opcode0.value);
6746 emit_src_register(emit, src);
6747 end_emit_instruction(emit);
6748
6749 return TRUE;
6750 }
6751
6752
6753 /**
6754 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
6755 * the register components are negative).
6756 */
6757 static boolean
emit_kill_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6758 emit_kill_if(struct svga_shader_emitter_v10 *emit,
6759 const struct tgsi_full_instruction *inst)
6760 {
6761 unsigned tmp = get_temp_index(emit);
6762 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6763 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6764
6765 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6766
6767 struct tgsi_full_dst_register tmp_dst_x =
6768 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6769 struct tgsi_full_src_register tmp_src_xxxx =
6770 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6771
6772 /* tmp = src[0] < 0.0 */
6773 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
6774
6775 if (!same_swizzle_terms(&inst->Src[0])) {
6776 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
6777 * logically OR the swizzle terms. Most uses of KILL_IF only
6778 * test one channel so it's good to avoid these extra steps.
6779 */
6780 struct tgsi_full_src_register tmp_src_yyyy =
6781 scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
6782 struct tgsi_full_src_register tmp_src_zzzz =
6783 scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
6784 struct tgsi_full_src_register tmp_src_wwww =
6785 scalar_src(&tmp_src, TGSI_SWIZZLE_W);
6786
6787 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6788 &tmp_src_yyyy);
6789 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6790 &tmp_src_zzzz);
6791 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6792 &tmp_src_wwww);
6793 }
6794
6795 begin_emit_instruction(emit);
6796 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
6797 emit_src_register(emit, &tmp_src_xxxx);
6798 end_emit_instruction(emit);
6799
6800 free_temp_indexes(emit);
6801
6802 return TRUE;
6803 }
6804
6805
6806 /**
6807 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
6808 */
6809 static boolean
emit_kill(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6810 emit_kill(struct svga_shader_emitter_v10 *emit,
6811 const struct tgsi_full_instruction *inst)
6812 {
6813 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6814
6815 /* DISCARD if 0.0 is zero */
6816 begin_emit_instruction(emit);
6817 emit_discard_opcode(emit, FALSE);
6818 emit_src_register(emit, &zero);
6819 end_emit_instruction(emit);
6820
6821 return TRUE;
6822 }
6823
6824
6825 /**
6826 * Emit code for TGSI_OPCODE_LG2 instruction.
6827 */
6828 static boolean
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6829 emit_lg2(struct svga_shader_emitter_v10 *emit,
6830 const struct tgsi_full_instruction *inst)
6831 {
6832 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
6833 * while VGPU10 computes four values.
6834 *
6835 * dst = LG2(src):
6836 * dst.xyzw = log2(src.x)
6837 */
6838
6839 struct tgsi_full_src_register src_xxxx =
6840 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6841 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6842
6843 /* LOG tmp, s0.xxxx */
6844 emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
6845 &inst->Dst[0], &src_xxxx, NULL, NULL,
6846 inst->Instruction.Saturate,
6847 inst->Instruction.Precise);
6848
6849 return TRUE;
6850 }
6851
6852
6853 /**
6854 * Emit code for TGSI_OPCODE_LIT instruction.
6855 */
6856 static boolean
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6857 emit_lit(struct svga_shader_emitter_v10 *emit,
6858 const struct tgsi_full_instruction *inst)
6859 {
6860 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6861
6862 /*
6863 * If dst and src are the same we need to create
6864 * a temporary for it and insert a extra move.
6865 */
6866 unsigned tmp_move = get_temp_index(emit);
6867 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6868 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6869
6870 /*
6871 * dst.x = 1
6872 * dst.y = max(src.x, 0)
6873 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
6874 * dst.w = 1
6875 */
6876
6877 /* MOV dst.x, 1.0 */
6878 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6879 struct tgsi_full_dst_register dst_x =
6880 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6881 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6882 }
6883
6884 /* MOV dst.w, 1.0 */
6885 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6886 struct tgsi_full_dst_register dst_w =
6887 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6888 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6889 }
6890
6891 /* MAX dst.y, src.x, 0.0 */
6892 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6893 struct tgsi_full_dst_register dst_y =
6894 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6895 struct tgsi_full_src_register zero =
6896 make_immediate_reg_float(emit, 0.0f);
6897 struct tgsi_full_src_register src_xxxx =
6898 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6899 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6900
6901 emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
6902 &zero, NULL, inst->Instruction.Saturate, FALSE);
6903 }
6904
6905 /*
6906 * tmp1 = clamp(src.w, -128, 128);
6907 * MAX tmp1, src.w, -128
6908 * MIN tmp1, tmp1, 128
6909 *
6910 * tmp2 = max(tmp2, 0);
6911 * MAX tmp2, src.y, 0
6912 *
6913 * tmp1 = pow(tmp2, tmp1);
6914 * LOG tmp2, tmp2
6915 * MUL tmp1, tmp2, tmp1
6916 * EXP tmp1, tmp1
6917 *
6918 * tmp1 = (src.w == 0) ? 1 : tmp1;
6919 * EQ tmp2, 0, src.w
6920 * MOVC tmp1, tmp2, 1.0, tmp1
6921 *
6922 * dst.z = (0 < src.x) ? tmp1 : 0;
6923 * LT tmp2, 0, src.x
6924 * MOVC dst.z, tmp2, tmp1, 0.0
6925 */
6926 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6927 struct tgsi_full_dst_register dst_z =
6928 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6929
6930 unsigned tmp1 = get_temp_index(emit);
6931 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
6932 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
6933 unsigned tmp2 = get_temp_index(emit);
6934 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
6935 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
6936
6937 struct tgsi_full_src_register src_xxxx =
6938 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6939 struct tgsi_full_src_register src_yyyy =
6940 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6941 struct tgsi_full_src_register src_wwww =
6942 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
6943
6944 struct tgsi_full_src_register zero =
6945 make_immediate_reg_float(emit, 0.0f);
6946 struct tgsi_full_src_register lowerbound =
6947 make_immediate_reg_float(emit, -128.0f);
6948 struct tgsi_full_src_register upperbound =
6949 make_immediate_reg_float(emit, 128.0f);
6950
6951 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
6952 &lowerbound);
6953 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
6954 &upperbound);
6955 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
6956 &zero);
6957
6958 /* POW tmp1, tmp2, tmp1 */
6959 /* LOG tmp2, tmp2 */
6960 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
6961
6962 /* MUL tmp1, tmp2, tmp1 */
6963 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
6964 &tmp1_src);
6965
6966 /* EXP tmp1, tmp1 */
6967 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
6968
6969 /* EQ tmp2, 0, src.w */
6970 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
6971 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
6972 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
6973 &tmp2_src, &one, &tmp1_src);
6974
6975 /* LT tmp2, 0, src.x */
6976 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
6977 /* MOVC dst.z, tmp2, tmp1, 0.0 */
6978 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
6979 &tmp2_src, &tmp1_src, &zero);
6980 }
6981
6982 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6983 free_temp_indexes(emit);
6984
6985 return TRUE;
6986 }
6987
6988
6989 /**
6990 * Emit Level Of Detail Query (LODQ) instruction.
6991 */
6992 static boolean
emit_lodq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6993 emit_lodq(struct svga_shader_emitter_v10 *emit,
6994 const struct tgsi_full_instruction *inst)
6995 {
6996 const uint unit = inst->Src[1].Register.Index;
6997
6998 assert(emit->version >= 41);
6999
7000 /* LOD dst, coord, resource, sampler */
7001 begin_emit_instruction(emit);
7002 emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
7003 emit_dst_register(emit, &inst->Dst[0]);
7004 emit_src_register(emit, &inst->Src[0]); /* coord */
7005 emit_resource_register(emit, unit);
7006 emit_sampler_register(emit, unit);
7007 end_emit_instruction(emit);
7008
7009 return TRUE;
7010 }
7011
7012
7013 /**
7014 * Emit code for TGSI_OPCODE_LOG instruction.
7015 */
7016 static boolean
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7017 emit_log(struct svga_shader_emitter_v10 *emit,
7018 const struct tgsi_full_instruction *inst)
7019 {
7020 /*
7021 * dst.x = floor(lg2(abs(s0.x)))
7022 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7023 * dst.z = lg2(abs(s0.x))
7024 * dst.w = 1.0
7025 */
7026
7027 struct tgsi_full_src_register src_xxxx =
7028 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7029 unsigned tmp = get_temp_index(emit);
7030 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7031 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7032 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7033
7034 /* only use X component of temp reg */
7035 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7036 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7037
7038 /* LOG tmp.x, abs(s0.x) */
7039 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7040 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7041 }
7042
7043 /* MOV dst.z, tmp.x */
7044 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7045 struct tgsi_full_dst_register dst_z =
7046 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7047
7048 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7049 &dst_z, &tmp_src, NULL, NULL,
7050 inst->Instruction.Saturate, FALSE);
7051 }
7052
7053 /* FLR tmp.x, tmp.x */
7054 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7055 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7056 }
7057
7058 /* MOV dst.x, tmp.x */
7059 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7060 struct tgsi_full_dst_register dst_x =
7061 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7062
7063 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7064 &dst_x, &tmp_src, NULL, NULL,
7065 inst->Instruction.Saturate, FALSE);
7066 }
7067
7068 /* EXP tmp.x, tmp.x */
7069 /* DIV dst.y, abs(s0.x), tmp.x */
7070 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7071 struct tgsi_full_dst_register dst_y =
7072 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7073
7074 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7075 emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7076 &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
7077 }
7078
7079 /* MOV dst.w, 1.0 */
7080 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7081 struct tgsi_full_dst_register dst_w =
7082 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7083 struct tgsi_full_src_register one =
7084 make_immediate_reg_float(emit, 1.0f);
7085
7086 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7087 }
7088
7089 free_temp_indexes(emit);
7090
7091 return TRUE;
7092 }
7093
7094
7095 /**
7096 * Emit code for TGSI_OPCODE_LRP instruction.
7097 */
7098 static boolean
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7099 emit_lrp(struct svga_shader_emitter_v10 *emit,
7100 const struct tgsi_full_instruction *inst)
7101 {
7102 /* dst = LRP(s0, s1, s2):
7103 * dst = s0 * (s1 - s2) + s2
7104 * Translates into:
7105 * SUB tmp, s1, s2; tmp = s1 - s2
7106 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2
7107 */
7108 unsigned tmp = get_temp_index(emit);
7109 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7110 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7111 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7112
7113 /* ADD tmp, s1, -s2 */
7114 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7115 &inst->Src[1], &neg_src2, NULL, FALSE,
7116 inst->Instruction.Precise);
7117
7118 /* MAD dst, s1, tmp, s3 */
7119 emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7120 &inst->Src[0], &src_tmp, &inst->Src[2],
7121 inst->Instruction.Saturate,
7122 inst->Instruction.Precise);
7123
7124 free_temp_indexes(emit);
7125
7126 return TRUE;
7127 }
7128
7129
7130 /**
7131 * Emit code for TGSI_OPCODE_POW instruction.
7132 */
7133 static boolean
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7134 emit_pow(struct svga_shader_emitter_v10 *emit,
7135 const struct tgsi_full_instruction *inst)
7136 {
7137 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7138 * src1.x while VGPU10 computes four values.
7139 *
7140 * dst = POW(src0, src1):
7141 * dst.xyzw = src0.x ^ src1.x
7142 */
7143 unsigned tmp = get_temp_index(emit);
7144 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7145 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7146 struct tgsi_full_src_register src0_xxxx =
7147 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7148 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7149 struct tgsi_full_src_register src1_xxxx =
7150 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7151 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7152
7153 /* LOG tmp, s0.xxxx */
7154 emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7155 &tmp_dst, &src0_xxxx, NULL, NULL,
7156 FALSE, inst->Instruction.Precise);
7157
7158 /* MUL tmp, tmp, s1.xxxx */
7159 emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7160 &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7161 FALSE, inst->Instruction.Precise);
7162
7163 /* EXP tmp, s0.xxxx */
7164 emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7165 &inst->Dst[0], &tmp_src, NULL, NULL,
7166 inst->Instruction.Saturate,
7167 inst->Instruction.Precise);
7168
7169 /* free tmp */
7170 free_temp_indexes(emit);
7171
7172 return TRUE;
7173 }
7174
7175
7176 /**
7177 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7178 */
7179 static boolean
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7180 emit_rcp(struct svga_shader_emitter_v10 *emit,
7181 const struct tgsi_full_instruction *inst)
7182 {
7183 if (emit->version >= 50) {
7184 /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise
7185 * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7186 * to manipulate the src register's swizzle.
7187 */
7188 struct tgsi_full_src_register src = inst->Src[0];
7189 src.Register.SwizzleY =
7190 src.Register.SwizzleZ =
7191 src.Register.SwizzleW = src.Register.SwizzleX;
7192
7193 begin_emit_instruction(emit);
7194 emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7195 inst->Instruction.Saturate,
7196 inst->Instruction.Precise);
7197 emit_dst_register(emit, &inst->Dst[0]);
7198 emit_src_register(emit, &src);
7199 end_emit_instruction(emit);
7200 }
7201 else {
7202 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7203
7204 unsigned tmp = get_temp_index(emit);
7205 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7206 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7207
7208 struct tgsi_full_dst_register tmp_dst_x =
7209 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7210 struct tgsi_full_src_register tmp_src_xxxx =
7211 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7212
7213 /* DIV tmp.x, 1.0, s0 */
7214 emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7215 &tmp_dst_x, &one, &inst->Src[0], NULL,
7216 FALSE, inst->Instruction.Precise);
7217
7218 /* MOV dst, tmp.xxxx */
7219 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7220 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7221 inst->Instruction.Saturate,
7222 inst->Instruction.Precise);
7223
7224 free_temp_indexes(emit);
7225 }
7226
7227 return TRUE;
7228 }
7229
7230
7231 /**
7232 * Emit code for TGSI_OPCODE_RSQ instruction.
7233 */
7234 static boolean
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7235 emit_rsq(struct svga_shader_emitter_v10 *emit,
7236 const struct tgsi_full_instruction *inst)
7237 {
7238 /* dst = RSQ(src):
7239 * dst.xyzw = 1 / sqrt(src.x)
7240 * Translates into:
7241 * RSQ tmp, src.x
7242 * MOV dst, tmp.xxxx
7243 */
7244
7245 unsigned tmp = get_temp_index(emit);
7246 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7247 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7248
7249 struct tgsi_full_dst_register tmp_dst_x =
7250 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7251 struct tgsi_full_src_register tmp_src_xxxx =
7252 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7253
7254 /* RSQ tmp, src.x */
7255 emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7256 &tmp_dst_x, &inst->Src[0], NULL, NULL,
7257 FALSE, inst->Instruction.Precise);
7258
7259 /* MOV dst, tmp.xxxx */
7260 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7261 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7262 inst->Instruction.Saturate,
7263 inst->Instruction.Precise);
7264
7265 /* free tmp */
7266 free_temp_indexes(emit);
7267
7268 return TRUE;
7269 }
7270
7271
7272 /**
7273 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7274 */
7275 static boolean
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7276 emit_seq(struct svga_shader_emitter_v10 *emit,
7277 const struct tgsi_full_instruction *inst)
7278 {
7279 /* dst = SEQ(s0, s1):
7280 * dst = s0 == s1 ? 1.0 : 0.0 (per component)
7281 * Translates into:
7282 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7283 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7284 */
7285 unsigned tmp = get_temp_index(emit);
7286 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7287 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7288 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7289 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7290
7291 /* EQ tmp, s0, s1 */
7292 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7293 &inst->Src[1]);
7294
7295 /* MOVC dst, tmp, one, zero */
7296 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7297 &one, &zero);
7298
7299 free_temp_indexes(emit);
7300
7301 return TRUE;
7302 }
7303
7304
7305 /**
7306 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7307 */
7308 static boolean
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7309 emit_sge(struct svga_shader_emitter_v10 *emit,
7310 const struct tgsi_full_instruction *inst)
7311 {
7312 /* dst = SGE(s0, s1):
7313 * dst = s0 >= s1 ? 1.0 : 0.0 (per component)
7314 * Translates into:
7315 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7316 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7317 */
7318 unsigned tmp = get_temp_index(emit);
7319 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7320 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7321 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7322 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7323
7324 /* GE tmp, s0, s1 */
7325 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7326 &inst->Src[1]);
7327
7328 /* MOVC dst, tmp, one, zero */
7329 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7330 &one, &zero);
7331
7332 free_temp_indexes(emit);
7333
7334 return TRUE;
7335 }
7336
7337
7338 /**
7339 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7340 */
7341 static boolean
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7342 emit_sgt(struct svga_shader_emitter_v10 *emit,
7343 const struct tgsi_full_instruction *inst)
7344 {
7345 /* dst = SGT(s0, s1):
7346 * dst = s0 > s1 ? 1.0 : 0.0 (per component)
7347 * Translates into:
7348 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7349 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7350 */
7351 unsigned tmp = get_temp_index(emit);
7352 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7353 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7354 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7355 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7356
7357 /* LT tmp, s1, s0 */
7358 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
7359 &inst->Src[0]);
7360
7361 /* MOVC dst, tmp, one, zero */
7362 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7363 &one, &zero);
7364
7365 free_temp_indexes(emit);
7366
7367 return TRUE;
7368 }
7369
7370
7371 /**
7372 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7373 */
7374 static boolean
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7375 emit_sincos(struct svga_shader_emitter_v10 *emit,
7376 const struct tgsi_full_instruction *inst)
7377 {
7378 unsigned tmp = get_temp_index(emit);
7379 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7380 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7381
7382 struct tgsi_full_src_register tmp_src_xxxx =
7383 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7384 struct tgsi_full_dst_register tmp_dst_x =
7385 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7386
7387 begin_emit_instruction(emit);
7388 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
7389
7390 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
7391 {
7392 emit_dst_register(emit, &tmp_dst_x); /* first destination register */
7393 emit_null_dst_register(emit); /* second destination register */
7394 }
7395 else {
7396 emit_null_dst_register(emit);
7397 emit_dst_register(emit, &tmp_dst_x);
7398 }
7399
7400 emit_src_register(emit, &inst->Src[0]);
7401 end_emit_instruction(emit);
7402
7403 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7404 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7405 inst->Instruction.Saturate,
7406 inst->Instruction.Precise);
7407
7408 free_temp_indexes(emit);
7409
7410 return TRUE;
7411 }
7412
7413
7414 /**
7415 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
7416 */
7417 static boolean
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7418 emit_sle(struct svga_shader_emitter_v10 *emit,
7419 const struct tgsi_full_instruction *inst)
7420 {
7421 /* dst = SLE(s0, s1):
7422 * dst = s0 <= s1 ? 1.0 : 0.0 (per component)
7423 * Translates into:
7424 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
7425 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7426 */
7427 unsigned tmp = get_temp_index(emit);
7428 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7429 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7430 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7431 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7432
7433 /* GE tmp, s1, s0 */
7434 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
7435 &inst->Src[0]);
7436
7437 /* MOVC dst, tmp, one, zero */
7438 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7439 &one, &zero);
7440
7441 free_temp_indexes(emit);
7442
7443 return TRUE;
7444 }
7445
7446
7447 /**
7448 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
7449 */
7450 static boolean
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7451 emit_slt(struct svga_shader_emitter_v10 *emit,
7452 const struct tgsi_full_instruction *inst)
7453 {
7454 /* dst = SLT(s0, s1):
7455 * dst = s0 < s1 ? 1.0 : 0.0 (per component)
7456 * Translates into:
7457 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
7458 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7459 */
7460 unsigned tmp = get_temp_index(emit);
7461 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7462 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7463 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7464 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7465
7466 /* LT tmp, s0, s1 */
7467 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
7468 &inst->Src[1]);
7469
7470 /* MOVC dst, tmp, one, zero */
7471 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7472 &one, &zero);
7473
7474 free_temp_indexes(emit);
7475
7476 return TRUE;
7477 }
7478
7479
7480 /**
7481 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
7482 */
7483 static boolean
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7484 emit_sne(struct svga_shader_emitter_v10 *emit,
7485 const struct tgsi_full_instruction *inst)
7486 {
7487 /* dst = SNE(s0, s1):
7488 * dst = s0 != s1 ? 1.0 : 0.0 (per component)
7489 * Translates into:
7490 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7491 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7492 */
7493 unsigned tmp = get_temp_index(emit);
7494 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7495 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7496 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7497 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7498
7499 /* NE tmp, s0, s1 */
7500 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
7501 &inst->Src[1]);
7502
7503 /* MOVC dst, tmp, one, zero */
7504 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7505 &one, &zero);
7506
7507 free_temp_indexes(emit);
7508
7509 return TRUE;
7510 }
7511
7512
7513 /**
7514 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
7515 */
7516 static boolean
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7517 emit_ssg(struct svga_shader_emitter_v10 *emit,
7518 const struct tgsi_full_instruction *inst)
7519 {
7520 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
7521 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
7522 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
7523 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
7524 * Translates into:
7525 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp)
7526 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component)
7527 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp)
7528 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component)
7529 */
7530 struct tgsi_full_src_register zero =
7531 make_immediate_reg_float(emit, 0.0f);
7532 struct tgsi_full_src_register one =
7533 make_immediate_reg_float(emit, 1.0f);
7534 struct tgsi_full_src_register neg_one =
7535 make_immediate_reg_float(emit, -1.0f);
7536
7537 unsigned tmp1 = get_temp_index(emit);
7538 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7539 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7540
7541 unsigned tmp2 = get_temp_index(emit);
7542 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7543 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7544
7545 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
7546 &zero);
7547 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
7548 &neg_one, &zero);
7549 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
7550 &inst->Src[0]);
7551 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
7552 &one, &tmp2_src);
7553
7554 free_temp_indexes(emit);
7555
7556 return TRUE;
7557 }
7558
7559
7560 /**
7561 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
7562 */
7563 static boolean
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7564 emit_issg(struct svga_shader_emitter_v10 *emit,
7565 const struct tgsi_full_instruction *inst)
7566 {
7567 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
7568 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
7569 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
7570 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
7571 * Translates into:
7572 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component)
7573 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component)
7574 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component)
7575 */
7576 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7577
7578 unsigned tmp1 = get_temp_index(emit);
7579 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7580 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7581
7582 unsigned tmp2 = get_temp_index(emit);
7583 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7584 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7585
7586 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
7587
7588 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
7589 &inst->Src[0], &zero);
7590 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
7591 &zero, &inst->Src[0]);
7592 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
7593 &tmp1_src, &neg_tmp2);
7594
7595 free_temp_indexes(emit);
7596
7597 return TRUE;
7598 }
7599
7600
7601 /**
7602 * Emit a comparison instruction. The dest register will get
7603 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
7604 */
7605 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)7606 emit_comparison(struct svga_shader_emitter_v10 *emit,
7607 SVGA3dCmpFunc func,
7608 const struct tgsi_full_dst_register *dst,
7609 const struct tgsi_full_src_register *src0,
7610 const struct tgsi_full_src_register *src1)
7611 {
7612 struct tgsi_full_src_register immediate;
7613 VGPU10OpcodeToken0 opcode0;
7614 boolean swapSrc = FALSE;
7615
7616 /* Sanity checks for svga vs. gallium enums */
7617 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
7618 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
7619
7620 opcode0.value = 0;
7621
7622 switch (func) {
7623 case SVGA3D_CMP_NEVER:
7624 immediate = make_immediate_reg_int(emit, 0);
7625 /* MOV dst, {0} */
7626 begin_emit_instruction(emit);
7627 emit_dword(emit, VGPU10_OPCODE_MOV);
7628 emit_dst_register(emit, dst);
7629 emit_src_register(emit, &immediate);
7630 end_emit_instruction(emit);
7631 return;
7632 case SVGA3D_CMP_ALWAYS:
7633 immediate = make_immediate_reg_int(emit, -1);
7634 /* MOV dst, {-1} */
7635 begin_emit_instruction(emit);
7636 emit_dword(emit, VGPU10_OPCODE_MOV);
7637 emit_dst_register(emit, dst);
7638 emit_src_register(emit, &immediate);
7639 end_emit_instruction(emit);
7640 return;
7641 case SVGA3D_CMP_LESS:
7642 opcode0.opcodeType = VGPU10_OPCODE_LT;
7643 break;
7644 case SVGA3D_CMP_EQUAL:
7645 opcode0.opcodeType = VGPU10_OPCODE_EQ;
7646 break;
7647 case SVGA3D_CMP_LESSEQUAL:
7648 opcode0.opcodeType = VGPU10_OPCODE_GE;
7649 swapSrc = TRUE;
7650 break;
7651 case SVGA3D_CMP_GREATER:
7652 opcode0.opcodeType = VGPU10_OPCODE_LT;
7653 swapSrc = TRUE;
7654 break;
7655 case SVGA3D_CMP_NOTEQUAL:
7656 opcode0.opcodeType = VGPU10_OPCODE_NE;
7657 break;
7658 case SVGA3D_CMP_GREATEREQUAL:
7659 opcode0.opcodeType = VGPU10_OPCODE_GE;
7660 break;
7661 default:
7662 assert(!"Unexpected comparison mode");
7663 opcode0.opcodeType = VGPU10_OPCODE_EQ;
7664 }
7665
7666 begin_emit_instruction(emit);
7667 emit_dword(emit, opcode0.value);
7668 emit_dst_register(emit, dst);
7669 if (swapSrc) {
7670 emit_src_register(emit, src1);
7671 emit_src_register(emit, src0);
7672 }
7673 else {
7674 emit_src_register(emit, src0);
7675 emit_src_register(emit, src1);
7676 }
7677 end_emit_instruction(emit);
7678 }
7679
7680
7681 /**
7682 * Get texel/address offsets for a texture instruction.
7683 */
7684 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])7685 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
7686 const struct tgsi_full_instruction *inst, int offsets[3])
7687 {
7688 if (inst->Texture.NumOffsets == 1) {
7689 /* According to OpenGL Shader Language spec the offsets are only
7690 * fetched from a previously-declared immediate/literal.
7691 */
7692 const struct tgsi_texture_offset *off = inst->TexOffsets;
7693 const unsigned index = off[0].Index;
7694 const unsigned swizzleX = off[0].SwizzleX;
7695 const unsigned swizzleY = off[0].SwizzleY;
7696 const unsigned swizzleZ = off[0].SwizzleZ;
7697 const union tgsi_immediate_data *imm = emit->immediates[index];
7698
7699 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
7700
7701 offsets[0] = imm[swizzleX].Int;
7702 offsets[1] = imm[swizzleY].Int;
7703 offsets[2] = imm[swizzleZ].Int;
7704 }
7705 else {
7706 offsets[0] = offsets[1] = offsets[2] = 0;
7707 }
7708 }
7709
7710
7711 /**
7712 * Set up the coordinate register for texture sampling.
7713 * When we're sampling from a RECT texture we have to scale the
7714 * unnormalized coordinate to a normalized coordinate.
7715 * We do that by multiplying the coordinate by an "extra" constant.
7716 * An alternative would be to use the RESINFO instruction to query the
7717 * texture's size.
7718 */
7719 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)7720 setup_texcoord(struct svga_shader_emitter_v10 *emit,
7721 unsigned unit,
7722 const struct tgsi_full_src_register *coord)
7723 {
7724 if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
7725 unsigned scale_index = emit->texcoord_scale_index[unit];
7726 unsigned tmp = get_temp_index(emit);
7727 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7728 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7729 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
7730
7731 if (emit->key.tex[unit].texel_bias) {
7732 /* to fix texture coordinate rounding issue, 0.0001 offset is
7733 * been added. This fixes piglit test fbo-blit-scaled-linear. */
7734 struct tgsi_full_src_register offset =
7735 make_immediate_reg_float(emit, 0.0001f);
7736
7737 /* ADD tmp, coord, offset */
7738 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
7739 coord, &offset);
7740 /* MUL tmp, tmp, scale */
7741 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7742 &tmp_src, &scale_src);
7743 }
7744 else {
7745 /* MUL tmp, coord, const[] */
7746 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7747 coord, &scale_src);
7748 }
7749 return tmp_src;
7750 }
7751 else {
7752 /* use texcoord as-is */
7753 return *coord;
7754 }
7755 }
7756
7757
7758 /**
7759 * For SAMPLE_C instructions, emit the extra src register which indicates
7760 * the reference/comparision value.
7761 */
7762 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)7763 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
7764 enum tgsi_texture_type target,
7765 const struct tgsi_full_src_register *coord)
7766 {
7767 struct tgsi_full_src_register coord_src_ref;
7768 int component;
7769
7770 assert(tgsi_is_shadow_target(target));
7771
7772 component = tgsi_util_get_shadow_ref_src_index(target) % 4;
7773 assert(component >= 0);
7774
7775 coord_src_ref = scalar_src(coord, component);
7776
7777 emit_src_register(emit, &coord_src_ref);
7778 }
7779
7780
7781 /**
7782 * Info for implementing texture swizzles.
7783 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
7784 * functions use this to encapsulate the extra steps needed to perform
7785 * a texture swizzle, or shadow/depth comparisons.
7786 * The shadow/depth comparison is only done here if for the cases where
7787 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
7788 */
7789 struct tex_swizzle_info
7790 {
7791 boolean swizzled;
7792 boolean shadow_compare;
7793 unsigned unit;
7794 enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */
7795 struct tgsi_full_src_register tmp_src;
7796 struct tgsi_full_dst_register tmp_dst;
7797 const struct tgsi_full_dst_register *inst_dst;
7798 const struct tgsi_full_src_register *coord_src;
7799 };
7800
7801
7802 /**
7803 * Do setup for handling texture swizzles or shadow compares.
7804 * \param unit the texture unit
7805 * \param inst the TGSI texture instruction
7806 * \param shadow_compare do shadow/depth comparison?
7807 * \param swz returns the swizzle info
7808 */
7809 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,boolean shadow_compare,struct tex_swizzle_info * swz)7810 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7811 unsigned unit,
7812 const struct tgsi_full_instruction *inst,
7813 boolean shadow_compare,
7814 struct tex_swizzle_info *swz)
7815 {
7816 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
7817 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
7818 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
7819 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
7820
7821 swz->shadow_compare = shadow_compare;
7822 swz->texture_target = inst->Texture.Texture;
7823
7824 if (swz->swizzled || shadow_compare) {
7825 /* Allocate temp register for the result of the SAMPLE instruction
7826 * and the source of the MOV/compare/swizzle instructions.
7827 */
7828 unsigned tmp = get_temp_index(emit);
7829 swz->tmp_src = make_src_temp_reg(tmp);
7830 swz->tmp_dst = make_dst_temp_reg(tmp);
7831
7832 swz->unit = unit;
7833 }
7834 swz->inst_dst = &inst->Dst[0];
7835 swz->coord_src = &inst->Src[0];
7836
7837 emit->fs.shadow_compare_units |= shadow_compare << unit;
7838 }
7839
7840
7841 /**
7842 * Returns the register to put the SAMPLE instruction results into.
7843 * This will either be the original instruction dst reg (if no swizzle
7844 * and no shadow comparison) or a temporary reg if there is a swizzle.
7845 */
7846 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)7847 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
7848 {
7849 return (swz->swizzled || swz->shadow_compare)
7850 ? &swz->tmp_dst : swz->inst_dst;
7851 }
7852
7853
7854 /**
7855 * This emits the MOV instruction that actually implements a texture swizzle
7856 * and/or shadow comparison.
7857 */
7858 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)7859 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7860 const struct tex_swizzle_info *swz)
7861 {
7862 if (swz->shadow_compare) {
7863 /* Emit extra instructions to compare the fetched texel value against
7864 * a texture coordinate component. The result of the comparison
7865 * is 0.0 or 1.0.
7866 */
7867 struct tgsi_full_src_register coord_src;
7868 struct tgsi_full_src_register texel_src =
7869 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
7870 struct tgsi_full_src_register one =
7871 make_immediate_reg_float(emit, 1.0f);
7872 /* convert gallium comparison func to SVGA comparison func */
7873 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
7874
7875 int component =
7876 tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
7877 assert(component >= 0);
7878 coord_src = scalar_src(swz->coord_src, component);
7879
7880 /* COMPARE tmp, coord, texel */
7881 emit_comparison(emit, compare_func,
7882 &swz->tmp_dst, &coord_src, &texel_src);
7883
7884 /* AND dest, tmp, {1.0} */
7885 begin_emit_instruction(emit);
7886 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
7887 if (swz->swizzled) {
7888 emit_dst_register(emit, &swz->tmp_dst);
7889 }
7890 else {
7891 emit_dst_register(emit, swz->inst_dst);
7892 }
7893 emit_src_register(emit, &swz->tmp_src);
7894 emit_src_register(emit, &one);
7895 end_emit_instruction(emit);
7896 }
7897
7898 if (swz->swizzled) {
7899 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
7900 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
7901 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
7902 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
7903 unsigned writemask_0 = 0, writemask_1 = 0;
7904 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
7905
7906 /* Swizzle w/out zero/one terms */
7907 struct tgsi_full_src_register src_swizzled =
7908 swizzle_src(&swz->tmp_src,
7909 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
7910 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
7911 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
7912 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
7913
7914 /* MOV dst, color(tmp).<swizzle> */
7915 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
7916 swz->inst_dst, &src_swizzled);
7917
7918 /* handle swizzle zero terms */
7919 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
7920 ((swz_g == PIPE_SWIZZLE_0) << 1) |
7921 ((swz_b == PIPE_SWIZZLE_0) << 2) |
7922 ((swz_a == PIPE_SWIZZLE_0) << 3));
7923 writemask_0 &= swz->inst_dst->Register.WriteMask;
7924
7925 if (writemask_0) {
7926 struct tgsi_full_src_register zero = int_tex ?
7927 make_immediate_reg_int(emit, 0) :
7928 make_immediate_reg_float(emit, 0.0f);
7929 struct tgsi_full_dst_register dst =
7930 writemask_dst(swz->inst_dst, writemask_0);
7931
7932 /* MOV dst.writemask_0, {0,0,0,0} */
7933 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
7934 }
7935
7936 /* handle swizzle one terms */
7937 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
7938 ((swz_g == PIPE_SWIZZLE_1) << 1) |
7939 ((swz_b == PIPE_SWIZZLE_1) << 2) |
7940 ((swz_a == PIPE_SWIZZLE_1) << 3));
7941 writemask_1 &= swz->inst_dst->Register.WriteMask;
7942
7943 if (writemask_1) {
7944 struct tgsi_full_src_register one = int_tex ?
7945 make_immediate_reg_int(emit, 1) :
7946 make_immediate_reg_float(emit, 1.0f);
7947 struct tgsi_full_dst_register dst =
7948 writemask_dst(swz->inst_dst, writemask_1);
7949
7950 /* MOV dst.writemask_1, {1,1,1,1} */
7951 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
7952 }
7953 }
7954 }
7955
7956
7957 /**
7958 * Emit code for TGSI_OPCODE_SAMPLE instruction.
7959 */
7960 static boolean
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7961 emit_sample(struct svga_shader_emitter_v10 *emit,
7962 const struct tgsi_full_instruction *inst)
7963 {
7964 const unsigned resource_unit = inst->Src[1].Register.Index;
7965 const unsigned sampler_unit = inst->Src[2].Register.Index;
7966 struct tgsi_full_src_register coord;
7967 int offsets[3];
7968 struct tex_swizzle_info swz_info;
7969
7970 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
7971
7972 get_texel_offsets(emit, inst, offsets);
7973
7974 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
7975
7976 /* SAMPLE dst, coord(s0), resource, sampler */
7977 begin_emit_instruction(emit);
7978
7979 /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
7980 * with LOD=0. But our virtual GPU accepts this as-is.
7981 */
7982 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
7983 inst->Instruction.Saturate, offsets);
7984 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
7985 emit_src_register(emit, &coord);
7986 emit_resource_register(emit, resource_unit);
7987 emit_sampler_register(emit, sampler_unit);
7988 end_emit_instruction(emit);
7989
7990 end_tex_swizzle(emit, &swz_info);
7991
7992 free_temp_indexes(emit);
7993
7994 return TRUE;
7995 }
7996
7997
7998 /**
7999 * Check if a texture instruction is valid.
8000 * An example of an invalid texture instruction is doing shadow comparison
8001 * with an integer-valued texture.
8002 * If we detect an invalid texture instruction, we replace it with:
8003 * MOV dst, {1,1,1,1};
8004 * \return TRUE if valid, FALSE if invalid.
8005 */
8006 static boolean
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8007 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8008 const struct tgsi_full_instruction *inst)
8009 {
8010 const unsigned unit = inst->Src[1].Register.Index;
8011 const enum tgsi_texture_type target = inst->Texture.Texture;
8012 boolean valid = TRUE;
8013
8014 if (tgsi_is_shadow_target(target) &&
8015 is_integer_type(emit->sampler_return_type[unit])) {
8016 debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8017 valid = FALSE;
8018 }
8019 /* XXX might check for other conditions in the future here */
8020
8021 if (!valid) {
8022 /* emit a MOV dst, {1,1,1,1} instruction. */
8023 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8024 begin_emit_instruction(emit);
8025 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
8026 emit_dst_register(emit, &inst->Dst[0]);
8027 emit_src_register(emit, &one);
8028 end_emit_instruction(emit);
8029 }
8030
8031 return valid;
8032 }
8033
8034
8035 /**
8036 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8037 */
8038 static boolean
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8039 emit_tex(struct svga_shader_emitter_v10 *emit,
8040 const struct tgsi_full_instruction *inst)
8041 {
8042 const uint unit = inst->Src[1].Register.Index;
8043 const enum tgsi_texture_type target = inst->Texture.Texture;
8044 VGPU10_OPCODE_TYPE opcode;
8045 struct tgsi_full_src_register coord;
8046 int offsets[3];
8047 struct tex_swizzle_info swz_info;
8048
8049 /* check that the sampler returns a float */
8050 if (!is_valid_tex_instruction(emit, inst))
8051 return TRUE;
8052
8053 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8054
8055 get_texel_offsets(emit, inst, offsets);
8056
8057 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8058
8059 /* SAMPLE dst, coord(s0), resource, sampler */
8060 begin_emit_instruction(emit);
8061
8062 if (tgsi_is_shadow_target(target))
8063 opcode = VGPU10_OPCODE_SAMPLE_C;
8064 else
8065 opcode = VGPU10_OPCODE_SAMPLE;
8066
8067 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8068 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8069 emit_src_register(emit, &coord);
8070 emit_resource_register(emit, unit);
8071 emit_sampler_register(emit, unit);
8072 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8073 emit_tex_compare_refcoord(emit, target, &coord);
8074 }
8075 end_emit_instruction(emit);
8076
8077 end_tex_swizzle(emit, &swz_info);
8078
8079 free_temp_indexes(emit);
8080
8081 return TRUE;
8082 }
8083
8084 /**
8085 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8086 */
8087 static boolean
emit_tg4(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8088 emit_tg4(struct svga_shader_emitter_v10 *emit,
8089 const struct tgsi_full_instruction *inst)
8090 {
8091 const uint unit = inst->Src[2].Register.Index;
8092 struct tgsi_full_src_register src;
8093 struct tgsi_full_src_register offset_src, sampler, ref;
8094 int offsets[3];
8095
8096 /* check that the sampler returns a float */
8097 if (!is_valid_tex_instruction(emit, inst))
8098 return TRUE;
8099
8100 if (emit->version >= 50) {
8101 unsigned target = inst->Texture.Texture;
8102 int index = inst->Src[1].Register.Index;
8103 const union tgsi_immediate_data *imm = emit->immediates[index];
8104 int select_comp = imm[inst->Src[1].Register.SwizzleX].Int;
8105 unsigned select_swizzle = PIPE_SWIZZLE_X;
8106
8107 if (!tgsi_is_shadow_target(target)) {
8108 switch (select_comp) {
8109 case 0:
8110 select_swizzle = emit->key.tex[unit].swizzle_r;
8111 break;
8112 case 1:
8113 select_swizzle = emit->key.tex[unit].swizzle_g;
8114 break;
8115 case 2:
8116 select_swizzle = emit->key.tex[unit].swizzle_b;
8117 break;
8118 case 3:
8119 select_swizzle = emit->key.tex[unit].swizzle_a;
8120 break;
8121 default:
8122 assert(!"Unexpected component in texture gather swizzle");
8123 }
8124 }
8125 else {
8126 select_swizzle = emit->key.tex[unit].swizzle_r;
8127 }
8128
8129 if (select_swizzle == PIPE_SWIZZLE_1) {
8130 src = make_immediate_reg_float(emit, 1.0);
8131 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8132 return TRUE;
8133 }
8134 else if (select_swizzle == PIPE_SWIZZLE_0) {
8135 src = make_immediate_reg_float(emit, 0.0);
8136 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8137 return TRUE;
8138 }
8139
8140 src = setup_texcoord(emit, unit, &inst->Src[0]);
8141
8142 /* GATHER4 dst, coord, resource, sampler */
8143 /* GATHER4_C dst, coord, resource, sampler ref */
8144 /* GATHER4_PO dst, coord, offset resource, sampler */
8145 /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8146 begin_emit_instruction(emit);
8147 if (inst->Texture.NumOffsets == 1) {
8148 if (tgsi_is_shadow_target(target)) {
8149 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8150 inst->Instruction.Saturate);
8151 }
8152 else {
8153 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8154 inst->Instruction.Saturate);
8155 }
8156 }
8157 else {
8158 if (tgsi_is_shadow_target(target)) {
8159 emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8160 inst->Instruction.Saturate);
8161 }
8162 else {
8163 emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8164 inst->Instruction.Saturate);
8165 }
8166 }
8167
8168 emit_dst_register(emit, &inst->Dst[0]);
8169 emit_src_register(emit, &src);
8170 if (inst->Texture.NumOffsets == 1) {
8171 /* offset */
8172 offset_src = make_src_reg(inst->TexOffsets[0].File,
8173 inst->TexOffsets[0].Index);
8174 offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8175 inst->TexOffsets[0].SwizzleY,
8176 inst->TexOffsets[0].SwizzleZ,
8177 TGSI_SWIZZLE_W);
8178 emit_src_register(emit, &offset_src);
8179 }
8180
8181 /* resource */
8182 emit_resource_register(emit, unit);
8183
8184 /* sampler */
8185 sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8186 sampler.Register.SwizzleX =
8187 sampler.Register.SwizzleY =
8188 sampler.Register.SwizzleZ =
8189 sampler.Register.SwizzleW = select_swizzle;
8190 emit_src_register(emit, &sampler);
8191
8192 if (tgsi_is_shadow_target(target)) {
8193 /* ref */
8194 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8195 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8196 emit_tex_compare_refcoord(emit, target, &ref);
8197 }
8198 else {
8199 emit_tex_compare_refcoord(emit, target, &src);
8200 }
8201 }
8202
8203 end_emit_instruction(emit);
8204 free_temp_indexes(emit);
8205 }
8206 else {
8207 /* Only a single channel is supported in SM4_1 and we report
8208 * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8209 * Only the 0th component will be gathered.
8210 */
8211 switch (emit->key.tex[unit].swizzle_r) {
8212 case PIPE_SWIZZLE_X:
8213 get_texel_offsets(emit, inst, offsets);
8214 src = setup_texcoord(emit, unit, &inst->Src[0]);
8215
8216 /* Gather dst, coord, resource, sampler */
8217 begin_emit_instruction(emit);
8218 emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8219 inst->Instruction.Saturate, offsets);
8220 emit_dst_register(emit, &inst->Dst[0]);
8221 emit_src_register(emit, &src);
8222 emit_resource_register(emit, unit);
8223
8224 /* sampler */
8225 sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8226 sampler.Register.SwizzleX =
8227 sampler.Register.SwizzleY =
8228 sampler.Register.SwizzleZ =
8229 sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8230 emit_src_register(emit, &sampler);
8231
8232 end_emit_instruction(emit);
8233 break;
8234 case PIPE_SWIZZLE_W:
8235 case PIPE_SWIZZLE_1:
8236 src = make_immediate_reg_float(emit, 1.0);
8237 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8238 break;
8239 case PIPE_SWIZZLE_Y:
8240 case PIPE_SWIZZLE_Z:
8241 case PIPE_SWIZZLE_0:
8242 default:
8243 src = make_immediate_reg_float(emit, 0.0);
8244 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8245 break;
8246 }
8247 }
8248
8249 return TRUE;
8250 }
8251
8252
8253
8254 /**
8255 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8256 */
8257 static boolean
emit_tex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8258 emit_tex2(struct svga_shader_emitter_v10 *emit,
8259 const struct tgsi_full_instruction *inst)
8260 {
8261 const uint unit = inst->Src[2].Register.Index;
8262 unsigned target = inst->Texture.Texture;
8263 struct tgsi_full_src_register coord, ref;
8264 int offsets[3];
8265 struct tex_swizzle_info swz_info;
8266
8267 /* check that the sampler returns a float */
8268 if (!is_valid_tex_instruction(emit, inst))
8269 return TRUE;
8270
8271 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8272
8273 get_texel_offsets(emit, inst, offsets);
8274
8275 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8276 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8277
8278 /* SAMPLE_C dst, coord, resource, sampler, ref */
8279 begin_emit_instruction(emit);
8280 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C,
8281 inst->Instruction.Saturate, offsets);
8282 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8283 emit_src_register(emit, &coord);
8284 emit_resource_register(emit, unit);
8285 emit_sampler_register(emit, unit);
8286 emit_tex_compare_refcoord(emit, target, &ref);
8287 end_emit_instruction(emit);
8288
8289 end_tex_swizzle(emit, &swz_info);
8290
8291 free_temp_indexes(emit);
8292
8293 return TRUE;
8294 }
8295
8296
8297 /**
8298 * Emit code for TGSI_OPCODE_TXP (projective texture)
8299 */
8300 static boolean
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8301 emit_txp(struct svga_shader_emitter_v10 *emit,
8302 const struct tgsi_full_instruction *inst)
8303 {
8304 const uint unit = inst->Src[1].Register.Index;
8305 const enum tgsi_texture_type target = inst->Texture.Texture;
8306 VGPU10_OPCODE_TYPE opcode;
8307 int offsets[3];
8308 unsigned tmp = get_temp_index(emit);
8309 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8310 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8311 struct tgsi_full_src_register src0_wwww =
8312 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8313 struct tgsi_full_src_register coord;
8314 struct tex_swizzle_info swz_info;
8315
8316 /* check that the sampler returns a float */
8317 if (!is_valid_tex_instruction(emit, inst))
8318 return TRUE;
8319
8320 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8321
8322 get_texel_offsets(emit, inst, offsets);
8323
8324 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8325
8326 /* DIV tmp, coord, coord.wwww */
8327 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
8328 &coord, &src0_wwww);
8329
8330 /* SAMPLE dst, coord(tmp), resource, sampler */
8331 begin_emit_instruction(emit);
8332
8333 if (tgsi_is_shadow_target(target))
8334 /* NOTE: for non-fragment shaders, we should use
8335 * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8336 */
8337 opcode = VGPU10_OPCODE_SAMPLE_C;
8338 else
8339 opcode = VGPU10_OPCODE_SAMPLE;
8340
8341 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8342 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8343 emit_src_register(emit, &tmp_src); /* projected coord */
8344 emit_resource_register(emit, unit);
8345 emit_sampler_register(emit, unit);
8346 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8347 emit_tex_compare_refcoord(emit, target, &tmp_src);
8348 }
8349 end_emit_instruction(emit);
8350
8351 end_tex_swizzle(emit, &swz_info);
8352
8353 free_temp_indexes(emit);
8354
8355 return TRUE;
8356 }
8357
8358
8359 /**
8360 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8361 */
8362 static boolean
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8363 emit_txd(struct svga_shader_emitter_v10 *emit,
8364 const struct tgsi_full_instruction *inst)
8365 {
8366 const uint unit = inst->Src[3].Register.Index;
8367 const enum tgsi_texture_type target = inst->Texture.Texture;
8368 int offsets[3];
8369 struct tgsi_full_src_register coord;
8370 struct tex_swizzle_info swz_info;
8371
8372 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8373 &swz_info);
8374
8375 get_texel_offsets(emit, inst, offsets);
8376
8377 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8378
8379 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8380 begin_emit_instruction(emit);
8381 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
8382 inst->Instruction.Saturate, offsets);
8383 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8384 emit_src_register(emit, &coord);
8385 emit_resource_register(emit, unit);
8386 emit_sampler_register(emit, unit);
8387 emit_src_register(emit, &inst->Src[1]); /* Xderiv */
8388 emit_src_register(emit, &inst->Src[2]); /* Yderiv */
8389 end_emit_instruction(emit);
8390
8391 end_tex_swizzle(emit, &swz_info);
8392
8393 free_temp_indexes(emit);
8394
8395 return TRUE;
8396 }
8397
8398
8399 /**
8400 * Emit code for TGSI_OPCODE_TXF (texel fetch)
8401 */
8402 static boolean
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8403 emit_txf(struct svga_shader_emitter_v10 *emit,
8404 const struct tgsi_full_instruction *inst)
8405 {
8406 const uint unit = inst->Src[1].Register.Index;
8407 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
8408 && emit->key.tex[unit].num_samples > 1;
8409 int offsets[3];
8410 struct tex_swizzle_info swz_info;
8411
8412 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8413
8414 get_texel_offsets(emit, inst, offsets);
8415
8416 if (msaa) {
8417 assert(emit->key.tex[unit].num_samples > 1);
8418
8419 /* Fetch one sample from an MSAA texture */
8420 struct tgsi_full_src_register sampleIndex =
8421 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8422 /* LD_MS dst, coord(s0), resource, sampleIndex */
8423 begin_emit_instruction(emit);
8424 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
8425 inst->Instruction.Saturate, offsets);
8426 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8427 emit_src_register(emit, &inst->Src[0]);
8428 emit_resource_register(emit, unit);
8429 emit_src_register(emit, &sampleIndex);
8430 end_emit_instruction(emit);
8431 }
8432 else {
8433 /* Fetch one texel specified by integer coordinate */
8434 /* LD dst, coord(s0), resource */
8435 begin_emit_instruction(emit);
8436 emit_sample_opcode(emit, VGPU10_OPCODE_LD,
8437 inst->Instruction.Saturate, offsets);
8438 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8439 emit_src_register(emit, &inst->Src[0]);
8440 emit_resource_register(emit, unit);
8441 end_emit_instruction(emit);
8442 }
8443
8444 end_tex_swizzle(emit, &swz_info);
8445
8446 free_temp_indexes(emit);
8447
8448 return TRUE;
8449 }
8450
8451
8452 /**
8453 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
8454 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
8455 */
8456 static boolean
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8457 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
8458 const struct tgsi_full_instruction *inst)
8459 {
8460 const enum tgsi_texture_type target = inst->Texture.Texture;
8461 VGPU10_OPCODE_TYPE opcode;
8462 unsigned unit;
8463 int offsets[3];
8464 struct tgsi_full_src_register coord, lod_bias;
8465 struct tex_swizzle_info swz_info;
8466
8467 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
8468 inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
8469 inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
8470
8471 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
8472 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8473 unit = inst->Src[2].Register.Index;
8474 }
8475 else {
8476 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8477 unit = inst->Src[1].Register.Index;
8478 }
8479
8480 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8481 &swz_info);
8482
8483 get_texel_offsets(emit, inst, offsets);
8484
8485 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8486
8487 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
8488 begin_emit_instruction(emit);
8489 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
8490 opcode = VGPU10_OPCODE_SAMPLE_L;
8491 }
8492 else {
8493 opcode = VGPU10_OPCODE_SAMPLE_B;
8494 }
8495 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8496 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8497 emit_src_register(emit, &coord);
8498 emit_resource_register(emit, unit);
8499 emit_sampler_register(emit, unit);
8500 emit_src_register(emit, &lod_bias);
8501 end_emit_instruction(emit);
8502
8503 end_tex_swizzle(emit, &swz_info);
8504
8505 free_temp_indexes(emit);
8506
8507 return TRUE;
8508 }
8509
8510
8511 /**
8512 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
8513 */
8514 static boolean
emit_txl2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8515 emit_txl2(struct svga_shader_emitter_v10 *emit,
8516 const struct tgsi_full_instruction *inst)
8517 {
8518 unsigned target = inst->Texture.Texture;
8519 unsigned opcode, unit;
8520 int offsets[3];
8521 struct tgsi_full_src_register coord, lod;
8522 struct tex_swizzle_info swz_info;
8523
8524 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
8525
8526 lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8527 unit = inst->Src[2].Register.Index;
8528
8529 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8530 &swz_info);
8531
8532 get_texel_offsets(emit, inst, offsets);
8533
8534 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8535
8536 /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
8537 begin_emit_instruction(emit);
8538 opcode = VGPU10_OPCODE_SAMPLE_L;
8539 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8540 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8541 emit_src_register(emit, &coord);
8542 emit_resource_register(emit, unit);
8543 emit_sampler_register(emit, unit);
8544 emit_src_register(emit, &lod);
8545 end_emit_instruction(emit);
8546
8547 end_tex_swizzle(emit, &swz_info);
8548
8549 free_temp_indexes(emit);
8550
8551 return TRUE;
8552 }
8553
8554
8555 /**
8556 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
8557 */
8558 static boolean
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8559 emit_txq(struct svga_shader_emitter_v10 *emit,
8560 const struct tgsi_full_instruction *inst)
8561 {
8562 const uint unit = inst->Src[1].Register.Index;
8563
8564 if (emit->key.tex[unit].target == PIPE_BUFFER) {
8565 /* RESINFO does not support querying texture buffers, so we instead
8566 * store texture buffer sizes in shader constants, then copy them to
8567 * implement TXQ instead of emitting RESINFO.
8568 * MOV dst, const[texture_buffer_size_index[unit]]
8569 */
8570 struct tgsi_full_src_register size_src =
8571 make_src_const_reg(emit->texture_buffer_size_index[unit]);
8572 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
8573 } else {
8574 /* RESINFO dst, srcMipLevel, resource */
8575 begin_emit_instruction(emit);
8576 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
8577 emit_dst_register(emit, &inst->Dst[0]);
8578 emit_src_register(emit, &inst->Src[0]);
8579 emit_resource_register(emit, unit);
8580 end_emit_instruction(emit);
8581 }
8582
8583 free_temp_indexes(emit);
8584
8585 return TRUE;
8586 }
8587
8588
8589 /**
8590 * Does this opcode produce a double-precision result?
8591 * XXX perhaps move this to a TGSI utility.
8592 */
8593 static bool
opcode_has_dbl_dst(unsigned opcode)8594 opcode_has_dbl_dst(unsigned opcode)
8595 {
8596 switch (opcode) {
8597 case TGSI_OPCODE_F2D:
8598 case TGSI_OPCODE_DABS:
8599 case TGSI_OPCODE_DADD:
8600 case TGSI_OPCODE_DFRAC:
8601 case TGSI_OPCODE_DMAX:
8602 case TGSI_OPCODE_DMIN:
8603 case TGSI_OPCODE_DMUL:
8604 case TGSI_OPCODE_DNEG:
8605 case TGSI_OPCODE_I2D:
8606 case TGSI_OPCODE_U2D:
8607 // XXX more TBD
8608 return true;
8609 default:
8610 return false;
8611 }
8612 }
8613
8614
8615 /**
8616 * Does this opcode use double-precision source registers?
8617 */
8618 static bool
opcode_has_dbl_src(unsigned opcode)8619 opcode_has_dbl_src(unsigned opcode)
8620 {
8621 switch (opcode) {
8622 case TGSI_OPCODE_D2F:
8623 case TGSI_OPCODE_DABS:
8624 case TGSI_OPCODE_DADD:
8625 case TGSI_OPCODE_DFRAC:
8626 case TGSI_OPCODE_DMAX:
8627 case TGSI_OPCODE_DMIN:
8628 case TGSI_OPCODE_DMUL:
8629 case TGSI_OPCODE_DNEG:
8630 case TGSI_OPCODE_D2I:
8631 case TGSI_OPCODE_D2U:
8632 // XXX more TBD
8633 return true;
8634 default:
8635 return false;
8636 }
8637 }
8638
8639
8640 /**
8641 * Check that the swizzle for reading from a double-precision register
8642 * is valid.
8643 */
8644 static void
check_double_src_swizzle(const struct tgsi_full_src_register * reg)8645 check_double_src_swizzle(const struct tgsi_full_src_register *reg)
8646 {
8647 assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
8648 reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
8649 (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
8650 reg->Register.SwizzleY == PIPE_SWIZZLE_W));
8651
8652 assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
8653 reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
8654 (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
8655 reg->Register.SwizzleW == PIPE_SWIZZLE_W));
8656 }
8657
8658
8659 /**
8660 * Check that the writemask for a double-precision instruction is valid.
8661 */
8662 static void
check_double_dst_writemask(const struct tgsi_full_instruction * inst)8663 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
8664 {
8665 ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
8666
8667 switch (inst->Instruction.Opcode) {
8668 case TGSI_OPCODE_DABS:
8669 case TGSI_OPCODE_DADD:
8670 case TGSI_OPCODE_DFRAC:
8671 case TGSI_OPCODE_DNEG:
8672 case TGSI_OPCODE_DMAD:
8673 case TGSI_OPCODE_DMAX:
8674 case TGSI_OPCODE_DMIN:
8675 case TGSI_OPCODE_DMUL:
8676 case TGSI_OPCODE_DRCP:
8677 case TGSI_OPCODE_DSQRT:
8678 case TGSI_OPCODE_F2D:
8679 assert(writemask == TGSI_WRITEMASK_XYZW ||
8680 writemask == TGSI_WRITEMASK_XY ||
8681 writemask == TGSI_WRITEMASK_ZW);
8682 break;
8683 case TGSI_OPCODE_DSEQ:
8684 case TGSI_OPCODE_DSGE:
8685 case TGSI_OPCODE_DSNE:
8686 case TGSI_OPCODE_DSLT:
8687 case TGSI_OPCODE_D2I:
8688 case TGSI_OPCODE_D2U:
8689 /* Write to 1 or 2 components only */
8690 assert(util_bitcount(writemask) <= 2);
8691 break;
8692 default:
8693 /* XXX this list may be incomplete */
8694 ;
8695 }
8696 }
8697
8698
8699 /**
8700 * Double-precision absolute value.
8701 */
8702 static boolean
emit_dabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8703 emit_dabs(struct svga_shader_emitter_v10 *emit,
8704 const struct tgsi_full_instruction *inst)
8705 {
8706 assert(emit->version >= 50);
8707 check_double_src_swizzle(&inst->Src[0]);
8708 check_double_dst_writemask(inst);
8709
8710 struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]);
8711
8712 /* DMOV dst, |src| */
8713 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
8714
8715 return TRUE;
8716 }
8717
8718
8719 /**
8720 * Double-precision negation
8721 */
8722 static boolean
emit_dneg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8723 emit_dneg(struct svga_shader_emitter_v10 *emit,
8724 const struct tgsi_full_instruction *inst)
8725 {
8726 assert(emit->version >= 50);
8727 check_double_src_swizzle(&inst->Src[0]);
8728 check_double_dst_writemask(inst);
8729
8730 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
8731
8732 /* DMOV dst, -src */
8733 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
8734
8735 return TRUE;
8736 }
8737
8738
8739 /**
8740 * SM5 has no DMAD opcode. Implement negation with DMUL/DADD.
8741 */
8742 static boolean
emit_dmad(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8743 emit_dmad(struct svga_shader_emitter_v10 *emit,
8744 const struct tgsi_full_instruction *inst)
8745 {
8746 assert(emit->version >= 50);
8747 check_double_src_swizzle(&inst->Src[0]);
8748 check_double_src_swizzle(&inst->Src[1]);
8749 check_double_src_swizzle(&inst->Src[2]);
8750 check_double_dst_writemask(inst);
8751
8752 unsigned tmp = get_temp_index(emit);
8753 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8754 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8755
8756 /* DMUL tmp, src[0], src[1] */
8757 emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
8758 &tmp_dst, &inst->Src[0], &inst->Src[1], NULL,
8759 FALSE, inst->Instruction.Precise);
8760
8761 /* DADD dst, tmp, src[2] */
8762 emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
8763 &inst->Dst[0], &tmp_src, &inst->Src[2], NULL,
8764 inst->Instruction.Saturate, inst->Instruction.Precise);
8765 free_temp_indexes(emit);
8766
8767 return TRUE;
8768 }
8769
8770
8771 /**
8772 * Double precision reciprocal square root
8773 */
8774 static boolean
emit_drsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)8775 emit_drsq(struct svga_shader_emitter_v10 *emit,
8776 const struct tgsi_full_dst_register *dst,
8777 const struct tgsi_full_src_register *src)
8778 {
8779 assert(emit->version >= 50);
8780
8781 VGPU10OpcodeToken0 token0;
8782 begin_emit_instruction(emit);
8783
8784 token0.value = 0;
8785 token0.opcodeType = VGPU10_OPCODE_VMWARE;
8786 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
8787 emit_dword(emit, token0.value);
8788
8789 emit_dst_register(emit, dst);
8790
8791 check_double_src_swizzle(src);
8792 emit_src_register(emit, src);
8793
8794 end_emit_instruction(emit);
8795
8796 return TRUE;
8797 }
8798
8799
8800 /**
8801 * There is no SM5 opcode for double precision square root.
8802 * It will be implemented with DRSQ.
8803 * dst = src * DRSQ(src)
8804 */
8805 static boolean
emit_dsqrt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8806 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
8807 const struct tgsi_full_instruction *inst)
8808 {
8809 assert(emit->version >= 50);
8810
8811 check_double_src_swizzle(&inst->Src[0]);
8812
8813 /* temporary register to hold the source */
8814 unsigned tmp = get_temp_index(emit);
8815 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8816 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8817
8818 /* temporary register to hold the DEQ result */
8819 unsigned tmp_cond = get_temp_index(emit);
8820 struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
8821 struct tgsi_full_dst_register tmp_cond_dst_xy =
8822 writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8823 struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
8824 struct tgsi_full_src_register tmp_cond_src_xy =
8825 swizzle_src(&tmp_cond_src,
8826 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
8827 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
8828
8829 /* The reciprocal square root of zero yields INF.
8830 * So if the source is 0, we replace it with 1 in the tmp register.
8831 * The later multiplication of zero in the original source will yield 0
8832 * in the result.
8833 */
8834
8835 /* tmp1 = (src == 0) ? 1 : src;
8836 * EQ tmp1, 0, src
8837 * MOVC tmp, tmp1, 1.0, src
8838 */
8839 struct tgsi_full_src_register zero =
8840 make_immediate_reg_double(emit, 0);
8841
8842 struct tgsi_full_src_register one =
8843 make_immediate_reg_double(emit, 1.0);
8844
8845 emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
8846 &zero, &inst->Src[0]);
8847 emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
8848 &tmp_cond_src_xy, &one, &inst->Src[0]);
8849
8850 struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
8851 struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
8852
8853 /* DRSQ tmp_rsq, tmp */
8854 emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
8855
8856 /* DMUL dst, tmp_rsq, src[0] */
8857 emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
8858 &tmp_rsq_src, &inst->Src[0]);
8859
8860 free_temp_indexes(emit);
8861
8862 return TRUE;
8863 }
8864
8865
8866 static boolean
emit_interp_offset(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8867 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
8868 const struct tgsi_full_instruction *inst)
8869 {
8870 assert(emit->version >= 50);
8871
8872 /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
8873 * where (0,0) is the center of the pixel. We need to translate that
8874 * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
8875 * Also need to flip the Y axis (I think).
8876 */
8877 unsigned tmp = get_temp_index(emit);
8878 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8879 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8880 struct tgsi_full_dst_register tmp_dst_xy =
8881 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8882 struct tgsi_full_src_register const16 =
8883 make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
8884
8885 /* MUL tmp.xy, src1, {16, -16, 0, 0} */
8886 emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
8887 &tmp_dst_xy, &inst->Src[1], &const16);
8888
8889 /* FTOI tmp.xy, tmp */
8890 emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
8891
8892 /* EVAL_SNAPPED dst, src0, tmp */
8893 emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
8894 &inst->Dst[0], &inst->Src[0], &tmp_src);
8895
8896 free_temp_indexes(emit);
8897
8898 return TRUE;
8899 }
8900
8901
8902 /**
8903 * Emit a simple instruction (like ADD, MUL, MIN, etc).
8904 */
8905 static boolean
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8906 emit_simple(struct svga_shader_emitter_v10 *emit,
8907 const struct tgsi_full_instruction *inst)
8908 {
8909 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8910 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
8911 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
8912 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
8913 unsigned i;
8914
8915 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
8916 emit->current_loop_depth++;
8917 }
8918 else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
8919 emit->current_loop_depth--;
8920 }
8921
8922 begin_emit_instruction(emit);
8923 emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
8924 inst->Instruction.Saturate,
8925 inst->Instruction.Precise);
8926 for (i = 0; i < op->num_dst; i++) {
8927 if (dbl_dst) {
8928 check_double_dst_writemask(inst);
8929 }
8930 emit_dst_register(emit, &inst->Dst[i]);
8931 }
8932 for (i = 0; i < op->num_src; i++) {
8933 if (dbl_src) {
8934 check_double_src_swizzle(&inst->Src[i]);
8935 }
8936 emit_src_register(emit, &inst->Src[i]);
8937 }
8938 end_emit_instruction(emit);
8939
8940 return TRUE;
8941 }
8942
8943
8944 /**
8945 * Emit MSB instruction (like IMSB, UMSB).
8946 *
8947 * GLSL returns the index starting from the LSB;
8948 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
8949 * To get correct location as per glsl from SM5 device, we should
8950 * return (31 - index) if returned index is not -1.
8951 */
8952 static boolean
emit_msb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8953 emit_msb(struct svga_shader_emitter_v10 *emit,
8954 const struct tgsi_full_instruction *inst)
8955 {
8956 const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
8957
8958 assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
8959
8960 struct tgsi_full_src_register index_src =
8961 make_src_reg(index_dst->Register.File, index_dst->Register.Index);
8962 struct tgsi_full_src_register imm31 =
8963 make_immediate_reg_int(emit, 31);
8964 imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
8965 struct tgsi_full_src_register neg_one =
8966 make_immediate_reg_int(emit, -1);
8967 neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
8968 unsigned tmp = get_temp_index(emit);
8969 const struct tgsi_full_dst_register tmp_dst =
8970 make_dst_temp_reg(tmp);
8971 const struct tgsi_full_dst_register tmp_dst_x =
8972 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8973 const struct tgsi_full_src_register tmp_src_x =
8974 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
8975 int writemask = TGSI_WRITEMASK_X;
8976 int src_swizzle = TGSI_SWIZZLE_X;
8977 int dst_writemask = index_dst->Register.WriteMask;
8978
8979 emit_simple(emit, inst);
8980
8981 /* index conversion from SM5 to GLSL */
8982 while (writemask & dst_writemask) {
8983 struct tgsi_full_src_register index_src_comp =
8984 scalar_src(&index_src, src_swizzle);
8985 struct tgsi_full_dst_register index_dst_comp =
8986 writemask_dst(index_dst, writemask);
8987
8988 /* check if index_src_comp != -1 */
8989 emit_instruction_op2(emit, VGPU10_OPCODE_INE,
8990 &tmp_dst_x, &index_src_comp, &neg_one);
8991
8992 /* if */
8993 emit_if(emit, &tmp_src_x);
8994
8995 index_src_comp = negate_src(&index_src_comp);
8996 /* SUB DST, IMM{31}, DST */
8997 emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
8998 &index_dst_comp, &imm31, &index_src_comp);
8999
9000 /* endif */
9001 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9002
9003 writemask = writemask << 1;
9004 src_swizzle = src_swizzle + 1;
9005 }
9006 free_temp_indexes(emit);
9007 return TRUE;
9008 }
9009
9010
9011 /**
9012 * Emit a BFE instruction (like UBFE, IBFE).
9013 * tgsi representation:
9014 * U/IBFE dst, value, offset, width
9015 * SM5 representation:
9016 * U/IBFE dst, width, offset, value
9017 * Note: SM5 has width & offset range (0-31);
9018 * whereas GLSL has width & offset range (0-32)
9019 */
9020 static boolean
emit_bfe(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9021 emit_bfe(struct svga_shader_emitter_v10 *emit,
9022 const struct tgsi_full_instruction *inst)
9023 {
9024 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9025 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9026 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9027 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9028 zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9029
9030 unsigned tmp1 = get_temp_index(emit);
9031 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9032 const struct tgsi_full_dst_register cond1_dst_x =
9033 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9034 const struct tgsi_full_src_register cond1_src_x =
9035 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9036
9037 unsigned tmp2 = get_temp_index(emit);
9038 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9039 const struct tgsi_full_dst_register cond2_dst_x =
9040 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9041 const struct tgsi_full_src_register cond2_src_x =
9042 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9043
9044 /**
9045 * In SM5, when width = 32 and offset = 0, it returns 0.
9046 * On the other hand GLSL, expects value to be copied as it is, to dst.
9047 */
9048
9049 /* cond1 = width ! = 32 */
9050 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9051 &cond1_dst_x, &inst->Src[2], &imm32);
9052
9053 /* cond2 = offset ! = 0 */
9054 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9055 &cond2_dst_x, &inst->Src[1], &zero);
9056
9057 /* cond 2 = cond1 & cond 2 */
9058 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9059 &cond2_src_x,
9060 &cond1_src_x);
9061 /* IF */
9062 emit_if(emit, &cond2_src_x);
9063
9064 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9065 &inst->Src[0]);
9066
9067 /* ELSE */
9068 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9069
9070 /* U/IBFE dst, width, offset, value */
9071 emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9072 &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9073
9074 /* ENDIF */
9075 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9076
9077 free_temp_indexes(emit);
9078 return TRUE;
9079 }
9080
9081
9082 /**
9083 * Emit BFI instruction
9084 * tgsi representation:
9085 * BFI dst, base, insert, offset, width
9086 * SM5 representation:
9087 * BFI dst, width, offset, insert, base
9088 * Note: SM5 has width & offset range (0-31);
9089 * whereas GLSL has width & offset range (0-32)
9090 */
9091 static boolean
emit_bfi(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9092 emit_bfi(struct svga_shader_emitter_v10 *emit,
9093 const struct tgsi_full_instruction *inst)
9094 {
9095 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9096 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9097 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9098
9099 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9100 zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9101
9102 unsigned tmp1 = get_temp_index(emit);
9103 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9104 const struct tgsi_full_dst_register cond1_dst_x =
9105 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9106 const struct tgsi_full_src_register cond1_src_x =
9107 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9108
9109 unsigned tmp2 = get_temp_index(emit);
9110 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9111 const struct tgsi_full_dst_register cond2_dst_x =
9112 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9113 const struct tgsi_full_src_register cond2_src_x =
9114 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9115
9116 /**
9117 * In SM5, when width = 32 and offset = 0, it returns 0.
9118 * On the other hand GLSL, expects insert to be copied as it is, to dst.
9119 */
9120
9121 /* cond1 = width == 32 */
9122 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9123 &cond1_dst_x, &inst->Src[3], &imm32);
9124
9125 /* cond1 = offset == 0 */
9126 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9127 &cond2_dst_x, &inst->Src[2], &zero);
9128
9129 /* cond2 = cond1 & cond2 */
9130 emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9131 &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9132
9133 /* if */
9134 emit_if(emit, &cond2_src_x);
9135
9136 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9137 &inst->Src[1]);
9138
9139 /* else */
9140 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9141
9142 /* BFI dst, width, offset, insert, base */
9143 begin_emit_instruction(emit);
9144 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9145 emit_dst_register(emit, &inst->Dst[0]);
9146 emit_src_register(emit, &inst->Src[3]);
9147 emit_src_register(emit, &inst->Src[2]);
9148 emit_src_register(emit, &inst->Src[1]);
9149 emit_src_register(emit, &inst->Src[0]);
9150 end_emit_instruction(emit);
9151
9152 /* endif */
9153 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9154
9155 free_temp_indexes(emit);
9156 return TRUE;
9157 }
9158
9159
9160 /**
9161 * We only special case the MOV instruction to try to detect constant
9162 * color writes in the fragment shader.
9163 */
9164 static boolean
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9165 emit_mov(struct svga_shader_emitter_v10 *emit,
9166 const struct tgsi_full_instruction *inst)
9167 {
9168 const struct tgsi_full_src_register *src = &inst->Src[0];
9169 const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9170
9171 if (emit->unit == PIPE_SHADER_FRAGMENT &&
9172 dst->Register.File == TGSI_FILE_OUTPUT &&
9173 dst->Register.Index == 0 &&
9174 src->Register.File == TGSI_FILE_CONSTANT &&
9175 !src->Register.Indirect) {
9176 emit->constant_color_output = TRUE;
9177 }
9178
9179 return emit_simple(emit, inst);
9180 }
9181
9182
9183 /**
9184 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9185 * where TGSI only uses one dest register.
9186 */
9187 static boolean
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)9188 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
9189 const struct tgsi_full_instruction *inst,
9190 unsigned dst_count,
9191 unsigned dst_index)
9192 {
9193 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9194 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9195 unsigned i;
9196
9197 begin_emit_instruction(emit);
9198 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9199
9200 for (i = 0; i < dst_count; i++) {
9201 if (i == dst_index) {
9202 emit_dst_register(emit, &inst->Dst[0]);
9203 } else {
9204 emit_null_dst_register(emit);
9205 }
9206 }
9207
9208 for (i = 0; i < op->num_src; i++) {
9209 emit_src_register(emit, &inst->Src[i]);
9210 }
9211 end_emit_instruction(emit);
9212
9213 return TRUE;
9214 }
9215
9216
9217 /**
9218 * Emit a vmware specific VGPU10 instruction.
9219 */
9220 static boolean
emit_vmware(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_VMWARE_OPCODE_TYPE subopcode)9221 emit_vmware(struct svga_shader_emitter_v10 *emit,
9222 const struct tgsi_full_instruction *inst,
9223 VGPU10_VMWARE_OPCODE_TYPE subopcode)
9224 {
9225 VGPU10OpcodeToken0 token0;
9226 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9227 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9228 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9229 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9230
9231 unsigned i;
9232
9233 begin_emit_instruction(emit);
9234
9235 assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
9236
9237 token0.value = 0;
9238 token0.opcodeType = VGPU10_OPCODE_VMWARE;
9239 token0.vmwareOpcodeType = subopcode;
9240 emit_dword(emit, token0.value);
9241
9242 if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
9243 /* IDIV only uses the first dest register. */
9244 emit_dst_register(emit, &inst->Dst[0]);
9245 emit_null_dst_register(emit);
9246 } else {
9247 for (i = 0; i < op->num_dst; i++) {
9248 if (dbl_dst) {
9249 check_double_dst_writemask(inst);
9250 }
9251 emit_dst_register(emit, &inst->Dst[i]);
9252 }
9253 }
9254
9255 for (i = 0; i < op->num_src; i++) {
9256 if (dbl_src) {
9257 check_double_src_swizzle(&inst->Src[i]);
9258 }
9259 emit_src_register(emit, &inst->Src[i]);
9260 }
9261 end_emit_instruction(emit);
9262
9263 return TRUE;
9264 }
9265
9266
9267 /**
9268 * Translate a single TGSI instruction to VGPU10.
9269 */
9270 static boolean
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)9271 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
9272 unsigned inst_number,
9273 const struct tgsi_full_instruction *inst)
9274 {
9275 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9276
9277 if (emit->skip_instruction)
9278 return TRUE;
9279
9280 switch (opcode) {
9281 case TGSI_OPCODE_ADD:
9282 case TGSI_OPCODE_AND:
9283 case TGSI_OPCODE_BGNLOOP:
9284 case TGSI_OPCODE_BRK:
9285 case TGSI_OPCODE_CEIL:
9286 case TGSI_OPCODE_CONT:
9287 case TGSI_OPCODE_DDX:
9288 case TGSI_OPCODE_DDY:
9289 case TGSI_OPCODE_DIV:
9290 case TGSI_OPCODE_DP2:
9291 case TGSI_OPCODE_DP3:
9292 case TGSI_OPCODE_DP4:
9293 case TGSI_OPCODE_ELSE:
9294 case TGSI_OPCODE_ENDIF:
9295 case TGSI_OPCODE_ENDLOOP:
9296 case TGSI_OPCODE_ENDSUB:
9297 case TGSI_OPCODE_F2I:
9298 case TGSI_OPCODE_F2U:
9299 case TGSI_OPCODE_FLR:
9300 case TGSI_OPCODE_FRC:
9301 case TGSI_OPCODE_FSEQ:
9302 case TGSI_OPCODE_FSGE:
9303 case TGSI_OPCODE_FSLT:
9304 case TGSI_OPCODE_FSNE:
9305 case TGSI_OPCODE_I2F:
9306 case TGSI_OPCODE_IMAX:
9307 case TGSI_OPCODE_IMIN:
9308 case TGSI_OPCODE_INEG:
9309 case TGSI_OPCODE_ISGE:
9310 case TGSI_OPCODE_ISHR:
9311 case TGSI_OPCODE_ISLT:
9312 case TGSI_OPCODE_MAD:
9313 case TGSI_OPCODE_MAX:
9314 case TGSI_OPCODE_MIN:
9315 case TGSI_OPCODE_MUL:
9316 case TGSI_OPCODE_NOP:
9317 case TGSI_OPCODE_NOT:
9318 case TGSI_OPCODE_OR:
9319 case TGSI_OPCODE_UADD:
9320 case TGSI_OPCODE_USEQ:
9321 case TGSI_OPCODE_USGE:
9322 case TGSI_OPCODE_USLT:
9323 case TGSI_OPCODE_UMIN:
9324 case TGSI_OPCODE_UMAD:
9325 case TGSI_OPCODE_UMAX:
9326 case TGSI_OPCODE_ROUND:
9327 case TGSI_OPCODE_SQRT:
9328 case TGSI_OPCODE_SHL:
9329 case TGSI_OPCODE_TRUNC:
9330 case TGSI_OPCODE_U2F:
9331 case TGSI_OPCODE_UCMP:
9332 case TGSI_OPCODE_USHR:
9333 case TGSI_OPCODE_USNE:
9334 case TGSI_OPCODE_XOR:
9335 /* Begin SM5 opcodes */
9336 case TGSI_OPCODE_F2D:
9337 case TGSI_OPCODE_D2F:
9338 case TGSI_OPCODE_DADD:
9339 case TGSI_OPCODE_DMUL:
9340 case TGSI_OPCODE_DMAX:
9341 case TGSI_OPCODE_DMIN:
9342 case TGSI_OPCODE_DSGE:
9343 case TGSI_OPCODE_DSLT:
9344 case TGSI_OPCODE_DSEQ:
9345 case TGSI_OPCODE_DSNE:
9346 case TGSI_OPCODE_BREV:
9347 case TGSI_OPCODE_POPC:
9348 case TGSI_OPCODE_LSB:
9349 case TGSI_OPCODE_INTERP_CENTROID:
9350 case TGSI_OPCODE_INTERP_SAMPLE:
9351 /* simple instructions */
9352 return emit_simple(emit, inst);
9353 case TGSI_OPCODE_RET:
9354 if (emit->unit == PIPE_SHADER_TESS_CTRL &&
9355 !emit->tcs.control_point_phase) {
9356
9357 /* store the tessellation levels in the patch constant phase only */
9358 store_tesslevels(emit);
9359 }
9360 return emit_simple(emit, inst);
9361
9362 case TGSI_OPCODE_IMSB:
9363 case TGSI_OPCODE_UMSB:
9364 return emit_msb(emit, inst);
9365 case TGSI_OPCODE_IBFE:
9366 case TGSI_OPCODE_UBFE:
9367 return emit_bfe(emit, inst);
9368 case TGSI_OPCODE_BFI:
9369 return emit_bfi(emit, inst);
9370 case TGSI_OPCODE_MOV:
9371 return emit_mov(emit, inst);
9372 case TGSI_OPCODE_EMIT:
9373 return emit_vertex(emit, inst);
9374 case TGSI_OPCODE_ENDPRIM:
9375 return emit_endprim(emit, inst);
9376 case TGSI_OPCODE_IABS:
9377 return emit_iabs(emit, inst);
9378 case TGSI_OPCODE_ARL:
9379 FALLTHROUGH;
9380 case TGSI_OPCODE_UARL:
9381 return emit_arl_uarl(emit, inst);
9382 case TGSI_OPCODE_BGNSUB:
9383 /* no-op */
9384 return TRUE;
9385 case TGSI_OPCODE_CAL:
9386 return emit_cal(emit, inst);
9387 case TGSI_OPCODE_CMP:
9388 return emit_cmp(emit, inst);
9389 case TGSI_OPCODE_COS:
9390 return emit_sincos(emit, inst);
9391 case TGSI_OPCODE_DST:
9392 return emit_dst(emit, inst);
9393 case TGSI_OPCODE_EX2:
9394 return emit_ex2(emit, inst);
9395 case TGSI_OPCODE_EXP:
9396 return emit_exp(emit, inst);
9397 case TGSI_OPCODE_IF:
9398 return emit_if(emit, &inst->Src[0]);
9399 case TGSI_OPCODE_KILL:
9400 return emit_kill(emit, inst);
9401 case TGSI_OPCODE_KILL_IF:
9402 return emit_kill_if(emit, inst);
9403 case TGSI_OPCODE_LG2:
9404 return emit_lg2(emit, inst);
9405 case TGSI_OPCODE_LIT:
9406 return emit_lit(emit, inst);
9407 case TGSI_OPCODE_LODQ:
9408 return emit_lodq(emit, inst);
9409 case TGSI_OPCODE_LOG:
9410 return emit_log(emit, inst);
9411 case TGSI_OPCODE_LRP:
9412 return emit_lrp(emit, inst);
9413 case TGSI_OPCODE_POW:
9414 return emit_pow(emit, inst);
9415 case TGSI_OPCODE_RCP:
9416 return emit_rcp(emit, inst);
9417 case TGSI_OPCODE_RSQ:
9418 return emit_rsq(emit, inst);
9419 case TGSI_OPCODE_SAMPLE:
9420 return emit_sample(emit, inst);
9421 case TGSI_OPCODE_SEQ:
9422 return emit_seq(emit, inst);
9423 case TGSI_OPCODE_SGE:
9424 return emit_sge(emit, inst);
9425 case TGSI_OPCODE_SGT:
9426 return emit_sgt(emit, inst);
9427 case TGSI_OPCODE_SIN:
9428 return emit_sincos(emit, inst);
9429 case TGSI_OPCODE_SLE:
9430 return emit_sle(emit, inst);
9431 case TGSI_OPCODE_SLT:
9432 return emit_slt(emit, inst);
9433 case TGSI_OPCODE_SNE:
9434 return emit_sne(emit, inst);
9435 case TGSI_OPCODE_SSG:
9436 return emit_ssg(emit, inst);
9437 case TGSI_OPCODE_ISSG:
9438 return emit_issg(emit, inst);
9439 case TGSI_OPCODE_TEX:
9440 return emit_tex(emit, inst);
9441 case TGSI_OPCODE_TG4:
9442 return emit_tg4(emit, inst);
9443 case TGSI_OPCODE_TEX2:
9444 return emit_tex2(emit, inst);
9445 case TGSI_OPCODE_TXP:
9446 return emit_txp(emit, inst);
9447 case TGSI_OPCODE_TXB:
9448 case TGSI_OPCODE_TXB2:
9449 case TGSI_OPCODE_TXL:
9450 return emit_txl_txb(emit, inst);
9451 case TGSI_OPCODE_TXD:
9452 return emit_txd(emit, inst);
9453 case TGSI_OPCODE_TXF:
9454 return emit_txf(emit, inst);
9455 case TGSI_OPCODE_TXL2:
9456 return emit_txl2(emit, inst);
9457 case TGSI_OPCODE_TXQ:
9458 return emit_txq(emit, inst);
9459 case TGSI_OPCODE_UIF:
9460 return emit_if(emit, &inst->Src[0]);
9461 case TGSI_OPCODE_UMUL_HI:
9462 case TGSI_OPCODE_IMUL_HI:
9463 case TGSI_OPCODE_UDIV:
9464 /* These cases use only the FIRST of two destination registers */
9465 return emit_simple_1dst(emit, inst, 2, 0);
9466 case TGSI_OPCODE_IDIV:
9467 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
9468 case TGSI_OPCODE_UMUL:
9469 case TGSI_OPCODE_UMOD:
9470 case TGSI_OPCODE_MOD:
9471 /* These cases use only the SECOND of two destination registers */
9472 return emit_simple_1dst(emit, inst, 2, 1);
9473
9474 /* Begin SM5 opcodes */
9475 case TGSI_OPCODE_DABS:
9476 return emit_dabs(emit, inst);
9477 case TGSI_OPCODE_DNEG:
9478 return emit_dneg(emit, inst);
9479 case TGSI_OPCODE_DRCP:
9480 return emit_simple(emit, inst);
9481 case TGSI_OPCODE_DSQRT:
9482 return emit_dsqrt(emit, inst);
9483 case TGSI_OPCODE_DMAD:
9484 return emit_dmad(emit, inst);
9485 case TGSI_OPCODE_DFRAC:
9486 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
9487 case TGSI_OPCODE_D2I:
9488 case TGSI_OPCODE_D2U:
9489 return emit_simple(emit, inst);
9490 case TGSI_OPCODE_I2D:
9491 case TGSI_OPCODE_U2D:
9492 return emit_simple(emit, inst);
9493 case TGSI_OPCODE_DRSQ:
9494 return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
9495 case TGSI_OPCODE_DDIV:
9496 return emit_simple(emit, inst);
9497 case TGSI_OPCODE_INTERP_OFFSET:
9498 return emit_interp_offset(emit, inst);
9499
9500 /* The following opcodes should never be seen here. We return zero
9501 * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
9502 * FMA_SUPPORTED, LDEXP_SUPPORTED queries.
9503 */
9504 case TGSI_OPCODE_FMA:
9505 case TGSI_OPCODE_LDEXP:
9506 case TGSI_OPCODE_DSSG:
9507 case TGSI_OPCODE_DFRACEXP:
9508 case TGSI_OPCODE_DLDEXP:
9509 case TGSI_OPCODE_DTRUNC:
9510 case TGSI_OPCODE_DCEIL:
9511 case TGSI_OPCODE_DFLR:
9512 debug_printf("Unexpected TGSI opcode %s. "
9513 "Should have been translated away by the GLSL compiler.\n",
9514 tgsi_get_opcode_name(opcode));
9515 return FALSE;
9516
9517 case TGSI_OPCODE_LOAD:
9518 case TGSI_OPCODE_STORE:
9519 case TGSI_OPCODE_ATOMAND:
9520 case TGSI_OPCODE_ATOMCAS:
9521 case TGSI_OPCODE_ATOMIMAX:
9522 case TGSI_OPCODE_ATOMIMIN:
9523 case TGSI_OPCODE_ATOMOR:
9524 case TGSI_OPCODE_ATOMUADD:
9525 case TGSI_OPCODE_ATOMUMAX:
9526 case TGSI_OPCODE_ATOMUMIN:
9527 case TGSI_OPCODE_ATOMXCHG:
9528 case TGSI_OPCODE_ATOMXOR:
9529 return FALSE;
9530 case TGSI_OPCODE_BARRIER:
9531 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
9532 /* SM5 device doesn't support BARRIER in tcs . If barrier is used
9533 * in shader, don't do anything for this opcode and continue rest
9534 * of shader translation
9535 */
9536 pipe_debug_message(&emit->svga_debug_callback, INFO,
9537 "barrier instruction is not supported in tessellation control shader\n");
9538 return TRUE;
9539 }
9540 else {
9541 return emit_simple(emit, inst);
9542 }
9543
9544 case TGSI_OPCODE_END:
9545 if (!emit_post_helpers(emit))
9546 return FALSE;
9547 return emit_simple(emit, inst);
9548
9549 default:
9550 debug_printf("Unimplemented tgsi instruction %s\n",
9551 tgsi_get_opcode_name(opcode));
9552 return FALSE;
9553 }
9554
9555 return TRUE;
9556 }
9557
9558
9559 /**
9560 * Emit the extra instructions to adjust the vertex position.
9561 * There are two possible adjustments:
9562 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
9563 * "prescale" and "pretranslate" values.
9564 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
9565 * \param vs_pos_tmp_index which temporary register contains the vertex pos.
9566 */
9567 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit)9568 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
9569 {
9570 struct tgsi_full_src_register tmp_pos_src;
9571 struct tgsi_full_dst_register pos_dst;
9572 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
9573
9574 /* Don't bother to emit any extra vertex instructions if vertex position is
9575 * not written out
9576 */
9577 if (emit->vposition.out_index == INVALID_INDEX)
9578 return;
9579
9580 /**
9581 * Reset the temporary vertex position register index
9582 * so that emit_dst_register() will use the real vertex position output
9583 */
9584 emit->vposition.tmp_index = INVALID_INDEX;
9585
9586 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
9587 pos_dst = make_dst_output_reg(emit->vposition.out_index);
9588
9589 /* If non-adjusted vertex position register index
9590 * is valid, copy the vertex position from the temporary
9591 * vertex position register before it is modified by the
9592 * prescale computation.
9593 */
9594 if (emit->vposition.so_index != INVALID_INDEX) {
9595 struct tgsi_full_dst_register pos_so_dst =
9596 make_dst_output_reg(emit->vposition.so_index);
9597
9598 /* MOV pos_so, tmp_pos */
9599 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
9600 }
9601
9602 if (emit->vposition.need_prescale) {
9603 /* This code adjusts the vertex position to match the VGPU10 convention.
9604 * If p is the position computed by the shader (usually by applying the
9605 * modelview and projection matrices), the new position q is computed by:
9606 *
9607 * q.x = p.w * trans.x + p.x * scale.x
9608 * q.y = p.w * trans.y + p.y * scale.y
9609 * q.z = p.w * trans.z + p.z * scale.z;
9610 * q.w = p.w * trans.w + p.w;
9611 */
9612 struct tgsi_full_src_register tmp_pos_src_w =
9613 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9614 struct tgsi_full_dst_register tmp_pos_dst =
9615 make_dst_temp_reg(vs_pos_tmp_index);
9616 struct tgsi_full_dst_register tmp_pos_dst_xyz =
9617 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
9618
9619 struct tgsi_full_src_register prescale_scale =
9620 make_src_temp_reg(emit->vposition.prescale_scale_index);
9621 struct tgsi_full_src_register prescale_trans =
9622 make_src_temp_reg(emit->vposition.prescale_trans_index);
9623
9624 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
9625 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
9626 &tmp_pos_src, &prescale_scale);
9627
9628 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
9629 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
9630 &prescale_trans, &tmp_pos_src);
9631 }
9632 else if (emit->key.vs.undo_viewport) {
9633 /* This code computes the final vertex position from the temporary
9634 * vertex position by undoing the viewport transformation and the
9635 * divide-by-W operation (we convert window coords back to clip coords).
9636 * This is needed when we use the 'draw' module for fallbacks.
9637 * If p is the temp pos in window coords, then the NDC coord q is:
9638 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
9639 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
9640 * q.z = p.z * p.w
9641 * q.w = p.w
9642 * CONST[vs_viewport_index] contains:
9643 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
9644 */
9645 struct tgsi_full_dst_register tmp_pos_dst =
9646 make_dst_temp_reg(vs_pos_tmp_index);
9647 struct tgsi_full_dst_register tmp_pos_dst_xy =
9648 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
9649 struct tgsi_full_src_register tmp_pos_src_wwww =
9650 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9651
9652 struct tgsi_full_dst_register pos_dst_xyz =
9653 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
9654 struct tgsi_full_dst_register pos_dst_w =
9655 writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
9656
9657 struct tgsi_full_src_register vp_xyzw =
9658 make_src_const_reg(emit->vs.viewport_index);
9659 struct tgsi_full_src_register vp_zwww =
9660 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
9661 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
9662
9663 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
9664 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
9665 &tmp_pos_src, &vp_zwww);
9666
9667 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
9668 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
9669 &tmp_pos_src, &vp_xyzw);
9670
9671 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
9672 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
9673 &tmp_pos_src, &tmp_pos_src_wwww);
9674
9675 /* MOV pos.w, tmp_pos.w */
9676 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
9677 }
9678 else if (vs_pos_tmp_index != INVALID_INDEX) {
9679 /* This code is to handle the case where the temporary vertex
9680 * position register is created when the vertex shader has stream
9681 * output and prescale is disabled because rasterization is to be
9682 * discarded.
9683 */
9684 struct tgsi_full_dst_register pos_dst =
9685 make_dst_output_reg(emit->vposition.out_index);
9686
9687 /* MOV pos, tmp_pos */
9688 begin_emit_instruction(emit);
9689 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9690 emit_dst_register(emit, &pos_dst);
9691 emit_src_register(emit, &tmp_pos_src);
9692 end_emit_instruction(emit);
9693 }
9694
9695 /* Restore original vposition.tmp_index value for the next GS vertex.
9696 * It doesn't matter for VS.
9697 */
9698 emit->vposition.tmp_index = vs_pos_tmp_index;
9699 }
9700
9701 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)9702 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
9703 {
9704 if (emit->clip_mode == CLIP_DISTANCE) {
9705 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
9706 emit_clip_distance_instructions(emit);
9707
9708 } else if (emit->clip_mode == CLIP_VERTEX &&
9709 emit->key.last_vertex_stage) {
9710 /* Convert TGSI CLIPVERTEX to CLIPDIST */
9711 emit_clip_vertex_instructions(emit);
9712 }
9713
9714 /**
9715 * Emit vertex position and take care of legacy user planes only if
9716 * there is a valid vertex position register index.
9717 * This is to take care of the case
9718 * where the shader doesn't output vertex position. Then in
9719 * this case, don't bother to emit more vertex instructions.
9720 */
9721 if (emit->vposition.out_index == INVALID_INDEX)
9722 return;
9723
9724 /**
9725 * Emit per-vertex clipping instructions for legacy user defined clip planes.
9726 * NOTE: we must emit the clip distance instructions before the
9727 * emit_vpos_instructions() call since the later function will change
9728 * the TEMP[vs_pos_tmp_index] value.
9729 */
9730 if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
9731 /* Emit CLIPDIST for legacy user defined clip planes */
9732 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
9733 }
9734 }
9735
9736
9737 /**
9738 * Emit extra per-vertex instructions. This includes clip-coordinate
9739 * space conversion and computing clip distances. This is called for
9740 * each GS emit-vertex instruction and at the end of VS translation.
9741 */
9742 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)9743 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
9744 {
9745 /* Emit clipping instructions based on clipping mode */
9746 emit_clipping_instructions(emit);
9747
9748 /* Emit vertex position instructions */
9749 emit_vpos_instructions(emit);
9750 }
9751
9752
9753 /**
9754 * Translate the TGSI_OPCODE_EMIT GS instruction.
9755 */
9756 static boolean
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9757 emit_vertex(struct svga_shader_emitter_v10 *emit,
9758 const struct tgsi_full_instruction *inst)
9759 {
9760 unsigned ret = TRUE;
9761
9762 assert(emit->unit == PIPE_SHADER_GEOMETRY);
9763
9764 /**
9765 * Emit the viewport array index for the first vertex.
9766 */
9767 if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
9768 struct tgsi_full_dst_register viewport_index_out =
9769 make_dst_output_reg(emit->gs.viewport_index_out_index);
9770 struct tgsi_full_dst_register viewport_index_out_x =
9771 writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
9772 struct tgsi_full_src_register viewport_index_tmp =
9773 make_src_temp_reg(emit->gs.viewport_index_tmp_index);
9774
9775 /* Set the out index to INVALID_INDEX, so it will not
9776 * be assigned to a temp again in emit_dst_register, and
9777 * the viewport index will not be assigned again in the
9778 * subsequent vertices.
9779 */
9780 emit->gs.viewport_index_out_index = INVALID_INDEX;
9781 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9782 &viewport_index_out_x, &viewport_index_tmp);
9783 }
9784
9785 /**
9786 * Find the stream index associated with this emit vertex instruction.
9787 */
9788 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
9789 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
9790
9791 /**
9792 * According to the ARB_gpu_shader5 spec, the built-in geometry shader
9793 * outputs are always associated with vertex stream zero.
9794 * So emit the extra vertex instructions for position or clip distance
9795 * for stream zero only.
9796 */
9797 if (streamIndex == 0) {
9798 /**
9799 * Before emitting vertex instructions, emit the temporaries for
9800 * the prescale constants based on the viewport index if needed.
9801 */
9802 if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
9803 emit_temp_prescale_instructions(emit);
9804
9805 emit_vertex_instructions(emit);
9806 }
9807
9808 begin_emit_instruction(emit);
9809 if (emit->version >= 50) {
9810 if (emit->info.num_stream_output_components[streamIndex] == 0) {
9811 /**
9812 * If there is no output for this stream, discard this instruction.
9813 */
9814 emit->discard_instruction = TRUE;
9815 }
9816 else {
9817 emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
9818 emit_stream_register(emit, streamIndex);
9819 }
9820 }
9821 else {
9822 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
9823 }
9824 end_emit_instruction(emit);
9825
9826 return ret;
9827 }
9828
9829
9830 /**
9831 * Emit the extra code to convert from VGPU10's boolean front-face
9832 * register to TGSI's signed front-face register.
9833 *
9834 * TODO: Make temporary front-face register a scalar.
9835 */
9836 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)9837 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
9838 {
9839 assert(emit->unit == PIPE_SHADER_FRAGMENT);
9840
9841 if (emit->fs.face_input_index != INVALID_INDEX) {
9842 /* convert vgpu10 boolean face register to gallium +/-1 value */
9843 struct tgsi_full_dst_register tmp_dst =
9844 make_dst_temp_reg(emit->fs.face_tmp_index);
9845 struct tgsi_full_src_register one =
9846 make_immediate_reg_float(emit, 1.0f);
9847 struct tgsi_full_src_register neg_one =
9848 make_immediate_reg_float(emit, -1.0f);
9849
9850 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
9851 begin_emit_instruction(emit);
9852 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
9853 emit_dst_register(emit, &tmp_dst);
9854 emit_face_register(emit);
9855 emit_src_register(emit, &one);
9856 emit_src_register(emit, &neg_one);
9857 end_emit_instruction(emit);
9858 }
9859 }
9860
9861
9862 /**
9863 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
9864 */
9865 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)9866 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
9867 {
9868 assert(emit->unit == PIPE_SHADER_FRAGMENT);
9869
9870 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
9871 struct tgsi_full_dst_register tmp_dst =
9872 make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
9873 struct tgsi_full_dst_register tmp_dst_xyz =
9874 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
9875 struct tgsi_full_dst_register tmp_dst_w =
9876 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9877 struct tgsi_full_src_register one =
9878 make_immediate_reg_float(emit, 1.0f);
9879 struct tgsi_full_src_register fragcoord =
9880 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
9881
9882 /* save the input index */
9883 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
9884 /* set to invalid to prevent substitution in emit_src_register() */
9885 emit->fs.fragcoord_input_index = INVALID_INDEX;
9886
9887 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
9888 begin_emit_instruction(emit);
9889 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9890 emit_dst_register(emit, &tmp_dst_xyz);
9891 emit_src_register(emit, &fragcoord);
9892 end_emit_instruction(emit);
9893
9894 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
9895 begin_emit_instruction(emit);
9896 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
9897 emit_dst_register(emit, &tmp_dst_w);
9898 emit_src_register(emit, &one);
9899 emit_src_register(emit, &fragcoord);
9900 end_emit_instruction(emit);
9901
9902 /* restore saved value */
9903 emit->fs.fragcoord_input_index = fragcoord_input_index;
9904 }
9905 }
9906
9907
9908 /**
9909 * Emit the extra code to get the current sample position value and
9910 * put it into a temp register.
9911 */
9912 static void
emit_sample_position_instructions(struct svga_shader_emitter_v10 * emit)9913 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
9914 {
9915 assert(emit->unit == PIPE_SHADER_FRAGMENT);
9916
9917 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
9918 assert(emit->version >= 41);
9919
9920 struct tgsi_full_dst_register tmp_dst =
9921 make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
9922 struct tgsi_full_src_register half =
9923 make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
9924
9925 struct tgsi_full_src_register tmp_src =
9926 make_src_temp_reg(emit->fs.sample_pos_tmp_index);
9927 struct tgsi_full_src_register sample_index_reg =
9928 make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
9929 emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
9930
9931 /* The first src register is a shader resource (if we want a
9932 * multisampled resource sample position) or the rasterizer register
9933 * (if we want the current sample position in the color buffer). We
9934 * want the later.
9935 */
9936
9937 /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
9938 begin_emit_instruction(emit);
9939 emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
9940 emit_dst_register(emit, &tmp_dst);
9941 emit_rasterizer_register(emit);
9942 emit_src_register(emit, &sample_index_reg);
9943 end_emit_instruction(emit);
9944
9945 /* Convert from D3D coords to GL coords by adding 0.5 bias */
9946 /* ADD dst, dst, half */
9947 begin_emit_instruction(emit);
9948 emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
9949 emit_dst_register(emit, &tmp_dst);
9950 emit_src_register(emit, &tmp_src);
9951 emit_src_register(emit, &half);
9952 end_emit_instruction(emit);
9953 }
9954 }
9955
9956
9957 /**
9958 * Emit extra instructions to adjust VS inputs/attributes. This can
9959 * mean casting a vertex attribute from int to float or setting the
9960 * W component to 1, or both.
9961 */
9962 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)9963 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
9964 {
9965 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
9966 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
9967 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
9968 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
9969 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
9970 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
9971 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
9972
9973 unsigned adjust_mask = (save_w_1_mask |
9974 save_itof_mask |
9975 save_utof_mask |
9976 save_is_bgra_mask |
9977 save_puint_to_snorm_mask |
9978 save_puint_to_uscaled_mask |
9979 save_puint_to_sscaled_mask);
9980
9981 assert(emit->unit == PIPE_SHADER_VERTEX);
9982
9983 if (adjust_mask) {
9984 struct tgsi_full_src_register one =
9985 make_immediate_reg_float(emit, 1.0f);
9986
9987 struct tgsi_full_src_register one_int =
9988 make_immediate_reg_int(emit, 1);
9989
9990 /* We need to turn off these bitmasks while emitting the
9991 * instructions below, then restore them afterward.
9992 */
9993 emit->key.vs.adjust_attrib_w_1 = 0;
9994 emit->key.vs.adjust_attrib_itof = 0;
9995 emit->key.vs.adjust_attrib_utof = 0;
9996 emit->key.vs.attrib_is_bgra = 0;
9997 emit->key.vs.attrib_puint_to_snorm = 0;
9998 emit->key.vs.attrib_puint_to_uscaled = 0;
9999 emit->key.vs.attrib_puint_to_sscaled = 0;
10000
10001 while (adjust_mask) {
10002 unsigned index = u_bit_scan(&adjust_mask);
10003
10004 /* skip the instruction if this vertex attribute is not being used */
10005 if (emit->info.input_usage_mask[index] == 0)
10006 continue;
10007
10008 unsigned tmp = emit->vs.adjusted_input[index];
10009 struct tgsi_full_src_register input_src =
10010 make_src_reg(TGSI_FILE_INPUT, index);
10011
10012 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10013 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10014 struct tgsi_full_dst_register tmp_dst_w =
10015 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
10016
10017 /* ITOF/UTOF/MOV tmp, input[index] */
10018 if (save_itof_mask & (1 << index)) {
10019 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
10020 &tmp_dst, &input_src);
10021 }
10022 else if (save_utof_mask & (1 << index)) {
10023 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
10024 &tmp_dst, &input_src);
10025 }
10026 else if (save_puint_to_snorm_mask & (1 << index)) {
10027 emit_puint_to_snorm(emit, &tmp_dst, &input_src);
10028 }
10029 else if (save_puint_to_uscaled_mask & (1 << index)) {
10030 emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
10031 }
10032 else if (save_puint_to_sscaled_mask & (1 << index)) {
10033 emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
10034 }
10035 else {
10036 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
10037 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10038 &tmp_dst, &input_src);
10039 }
10040
10041 if (save_is_bgra_mask & (1 << index)) {
10042 emit_swap_r_b(emit, &tmp_dst, &tmp_src);
10043 }
10044
10045 if (save_w_1_mask & (1 << index)) {
10046 /* MOV tmp.w, 1.0 */
10047 if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
10048 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10049 &tmp_dst_w, &one_int);
10050 }
10051 else {
10052 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
10053 &tmp_dst_w, &one);
10054 }
10055 }
10056 }
10057
10058 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
10059 emit->key.vs.adjust_attrib_itof = save_itof_mask;
10060 emit->key.vs.adjust_attrib_utof = save_utof_mask;
10061 emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
10062 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
10063 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
10064 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
10065 }
10066 }
10067
10068
10069 /* Find zero-value immedate for default layer index */
10070 static void
emit_default_layer_instructions(struct svga_shader_emitter_v10 * emit)10071 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
10072 {
10073 assert(emit->unit == PIPE_SHADER_FRAGMENT);
10074
10075 /* immediate for default layer index 0 */
10076 if (emit->fs.layer_input_index != INVALID_INDEX) {
10077 union tgsi_immediate_data imm;
10078 imm.Int = 0;
10079 emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
10080 }
10081 }
10082
10083
10084 static void
emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned cbuf_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate)10085 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10086 unsigned cbuf_index,
10087 struct tgsi_full_dst_register *scale,
10088 struct tgsi_full_dst_register *translate)
10089 {
10090 struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
10091 struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
10092
10093 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
10094 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
10095 }
10096
10097
10098 /**
10099 * A recursive helper function to find the prescale from the constant buffer
10100 */
10101 static void
find_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned num_prescale,struct tgsi_full_src_register * vp_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate,struct tgsi_full_src_register * tmp_src,struct tgsi_full_dst_register * tmp_dst)10102 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10103 unsigned index, unsigned num_prescale,
10104 struct tgsi_full_src_register *vp_index,
10105 struct tgsi_full_dst_register *scale,
10106 struct tgsi_full_dst_register *translate,
10107 struct tgsi_full_src_register *tmp_src,
10108 struct tgsi_full_dst_register *tmp_dst)
10109 {
10110 if (num_prescale == 0)
10111 return;
10112
10113 if (index > 0) {
10114 /* ELSE */
10115 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10116 }
10117
10118 struct tgsi_full_src_register index_src =
10119 make_immediate_reg_int(emit, index);
10120
10121 if (index == 0) {
10122 /* GE tmp, vp_index, index */
10123 emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
10124 vp_index, &index_src);
10125 } else {
10126 /* EQ tmp, vp_index, index */
10127 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
10128 vp_index, &index_src);
10129 }
10130
10131 /* IF tmp */
10132 emit_if(emit, tmp_src);
10133 emit_temp_prescale_from_cbuf(emit,
10134 emit->vposition.prescale_cbuf_index + 2 * index,
10135 scale, translate);
10136
10137 find_prescale_from_cbuf(emit, index+1, num_prescale-1,
10138 vp_index, scale, translate,
10139 tmp_src, tmp_dst);
10140
10141 /* ENDIF */
10142 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10143 }
10144
10145
10146 /**
10147 * This helper function emits instructions to set the prescale
10148 * and translate temporaries to the correct constants from the
10149 * constant buffer according to the designated viewport.
10150 */
10151 static void
emit_temp_prescale_instructions(struct svga_shader_emitter_v10 * emit)10152 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
10153 {
10154 struct tgsi_full_dst_register prescale_scale =
10155 make_dst_temp_reg(emit->vposition.prescale_scale_index);
10156 struct tgsi_full_dst_register prescale_translate =
10157 make_dst_temp_reg(emit->vposition.prescale_trans_index);
10158
10159 unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
10160
10161 if (emit->vposition.num_prescale == 1) {
10162 emit_temp_prescale_from_cbuf(emit,
10163 prescale_cbuf_index,
10164 &prescale_scale, &prescale_translate);
10165 } else {
10166 /**
10167 * Since SM5 device does not support dynamic indexing, we need
10168 * to do the if-else to find the prescale constants for the
10169 * specified viewport.
10170 */
10171 struct tgsi_full_src_register vp_index_src =
10172 make_src_temp_reg(emit->gs.viewport_index_tmp_index);
10173
10174 struct tgsi_full_src_register vp_index_src_x =
10175 scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
10176
10177 unsigned tmp = get_temp_index(emit);
10178 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10179 struct tgsi_full_src_register tmp_src_x =
10180 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10181 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10182
10183 find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
10184 &vp_index_src_x,
10185 &prescale_scale, &prescale_translate,
10186 &tmp_src_x, &tmp_dst);
10187 }
10188
10189 /* Mark prescale temporaries are emitted */
10190 emit->vposition.have_prescale = 1;
10191 }
10192
10193
10194 /**
10195 * A helper function to emit an instruction in a vertex shader to add a bias
10196 * to the VertexID system value. This patches the VertexID in the SVGA vertex
10197 * shader to include the base vertex of an indexed primitive or the start index
10198 * of a non-indexed primitive.
10199 */
10200 static void
emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 * emit)10201 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
10202 {
10203 struct tgsi_full_src_register vertex_id_bias_index =
10204 make_src_const_reg(emit->vs.vertex_id_bias_index);
10205 struct tgsi_full_src_register vertex_id_sys_src =
10206 make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
10207 struct tgsi_full_src_register vertex_id_sys_src_x =
10208 scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
10209 struct tgsi_full_dst_register vertex_id_tmp_dst =
10210 make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
10211
10212 /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
10213 unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
10214 emit->vs.vertex_id_tmp_index = INVALID_INDEX;
10215 emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
10216 &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
10217 FALSE);
10218 emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
10219 }
10220
10221 /**
10222 * Hull Shader must have control point outputs. But tessellation
10223 * control shader can return without writing to control point output.
10224 * In this case, the control point output is assumed to be passthrough
10225 * from the control point input.
10226 * This helper function is to write out a control point output first in case
10227 * the tessellation control shader returns before writing a
10228 * control point output.
10229 */
10230 static void
emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 * emit)10231 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
10232 {
10233 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10234 assert(emit->tcs.control_point_phase);
10235 assert(emit->tcs.control_point_input_index != INVALID_INDEX);
10236 assert(emit->tcs.control_point_out_index != INVALID_INDEX);
10237 assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
10238
10239 /* UARL ADDR[INDEX].x INVOCATION.xxxx */
10240
10241 struct tgsi_full_src_register invocation_src;
10242 struct tgsi_full_dst_register addr_dst;
10243 struct tgsi_full_dst_register addr_dst_x;
10244 unsigned addr_tmp;
10245
10246 addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
10247 addr_dst = make_dst_temp_reg(addr_tmp);
10248 addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
10249
10250 invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
10251 emit->tcs.invocation_id_sys_index);
10252
10253 begin_emit_instruction(emit);
10254 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10255 emit_dst_register(emit, &addr_dst_x);
10256 emit_src_register(emit, &invocation_src);
10257 end_emit_instruction(emit);
10258
10259
10260 /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
10261
10262 struct tgsi_full_src_register input_control_point;
10263 struct tgsi_full_dst_register output_control_point;
10264
10265 input_control_point = make_src_reg(TGSI_FILE_INPUT,
10266 emit->tcs.control_point_input_index);
10267 input_control_point.Register.Dimension = 1;
10268 input_control_point.Dimension.Indirect = 1;
10269 input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
10270 input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index;
10271 output_control_point =
10272 make_dst_output_reg(emit->tcs.control_point_out_index);
10273
10274 begin_emit_instruction(emit);
10275 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10276 emit_dst_register(emit, &output_control_point);
10277 emit_src_register(emit, &input_control_point);
10278 end_emit_instruction(emit);
10279 }
10280
10281 /**
10282 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
10283 * values in domain shader. SM5 has tessfactors as floating point values where
10284 * as tgsi emit them as vector. This function allows to construct temp
10285 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
10286 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
10287 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
10288 */
10289 static void
emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 * emit)10290 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
10291 {
10292 struct tgsi_full_src_register src;
10293 struct tgsi_full_dst_register dst;
10294
10295 if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
10296 dst = make_dst_temp_reg(emit->tes.inner.temp_index);
10297
10298 switch (emit->tes.prim_mode) {
10299 case PIPE_PRIM_QUADS:
10300 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10301 emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
10302 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10303 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10304 FALLTHROUGH;
10305 case PIPE_PRIM_TRIANGLES:
10306 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10307 emit->tes.inner.in_index, TGSI_SWIZZLE_X);
10308 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10309 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10310 break;
10311 case PIPE_PRIM_LINES:
10312 /**
10313 * As per SM5 spec, InsideTessFactor for isolines are unused.
10314 * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
10315 * any application try to read tessInnerLevel in TES when primitive type
10316 * is isolines, then instead of driver throwing segfault for accesing it,
10317 * return atleast vec(1.0f)
10318 */
10319 src = make_immediate_reg_float(emit, 1.0f);
10320 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10321 break;
10322 default:
10323 break;
10324 }
10325 }
10326
10327 if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
10328 dst = make_dst_temp_reg(emit->tes.outer.temp_index);
10329
10330 switch (emit->tes.prim_mode) {
10331 case PIPE_PRIM_QUADS:
10332 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10333 emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
10334 dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
10335 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10336 FALLTHROUGH;
10337 case PIPE_PRIM_TRIANGLES:
10338 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10339 emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
10340 dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
10341 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10342 FALLTHROUGH;
10343 case PIPE_PRIM_LINES:
10344 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10345 emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
10346 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10347 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10348
10349 src = make_src_scalar_reg(TGSI_FILE_INPUT,
10350 emit->tes.outer.in_index , TGSI_SWIZZLE_X);
10351 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10352 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10353
10354 break;
10355 default:
10356 break;
10357 }
10358 }
10359 }
10360
10361
10362 static void
emit_initialize_temp_instruction(struct svga_shader_emitter_v10 * emit)10363 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
10364 {
10365 struct tgsi_full_src_register src;
10366 struct tgsi_full_dst_register dst;
10367 unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
10368 emit->initialize_temp_index);
10369 src = make_immediate_reg_float(emit, 0.0f);
10370 dst = make_dst_temp_reg(vgpu10_temp_index);
10371 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10372 emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
10373 emit->initialize_temp_index = INVALID_INDEX;
10374 }
10375
10376
10377 /**
10378 * Emit any extra/helper declarations/code that we might need between
10379 * the declaration section and code section.
10380 */
10381 static boolean
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)10382 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
10383 {
10384 /* Properties */
10385 if (emit->unit == PIPE_SHADER_GEOMETRY)
10386 emit_property_instructions(emit);
10387 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10388 emit_hull_shader_declarations(emit);
10389
10390 /* Save the position of the first instruction token so that we can
10391 * do a second pass of the instructions for the patch constant phase.
10392 */
10393 emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
10394 emit->tcs.fork_phase_add_signature = FALSE;
10395
10396 if (!emit_hull_shader_control_point_phase(emit)) {
10397 emit->skip_instruction = TRUE;
10398 return TRUE;
10399 }
10400
10401 /* Set the current tcs phase to control point phase */
10402 emit->tcs.control_point_phase = TRUE;
10403 }
10404 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10405 emit_domain_shader_declarations(emit);
10406 }
10407
10408 /* Declare inputs */
10409 if (!emit_input_declarations(emit))
10410 return FALSE;
10411
10412 /* Declare outputs */
10413 if (!emit_output_declarations(emit))
10414 return FALSE;
10415
10416 /* Declare temporary registers */
10417 emit_temporaries_declaration(emit);
10418
10419 /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
10420 * will already be declared in hs_decls (emit_hull_shader_declarations)
10421 */
10422 if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10423 /* Declare constant registers */
10424 emit_constant_declaration(emit);
10425
10426 /* Declare samplers and resources */
10427 emit_sampler_declarations(emit);
10428 emit_resource_declarations(emit);
10429
10430 alloc_common_immediates(emit);
10431 /* Now, emit the constant block containing all the immediates
10432 * declared by shader, as well as the extra ones seen above.
10433 */
10434 }
10435
10436 if (emit->unit != PIPE_SHADER_FRAGMENT) {
10437 /*
10438 * Declare clip distance output registers for ClipVertex or
10439 * user defined planes
10440 */
10441 emit_clip_distance_declarations(emit);
10442 }
10443
10444 if (emit->unit == PIPE_SHADER_FRAGMENT &&
10445 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10446 float alpha = emit->key.fs.alpha_ref;
10447 emit->fs.alpha_ref_index =
10448 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
10449 }
10450
10451 if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10452 /**
10453 * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
10454 * hs_decls
10455 */
10456 emit_vgpu10_immediates_block(emit);
10457 }
10458 else {
10459 emit_tcs_default_control_point_output(emit);
10460 }
10461
10462 if (emit->unit == PIPE_SHADER_FRAGMENT) {
10463 emit_frontface_instructions(emit);
10464 emit_fragcoord_instructions(emit);
10465 emit_sample_position_instructions(emit);
10466 emit_default_layer_instructions(emit);
10467 }
10468 else if (emit->unit == PIPE_SHADER_VERTEX) {
10469 emit_vertex_attrib_instructions(emit);
10470
10471 if (emit->info.uses_vertexid)
10472 emit_vertex_id_nobase_instruction(emit);
10473 }
10474 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10475 emit_temp_tessfactor_instructions(emit);
10476 }
10477
10478 /**
10479 * For geometry shader that writes to viewport index, the prescale
10480 * temporaries will be done at the first vertex emission.
10481 */
10482 if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
10483 emit_temp_prescale_instructions(emit);
10484
10485 return TRUE;
10486 }
10487
10488
10489 /**
10490 * The device has no direct support for the pipe_blend_state::alpha_to_one
10491 * option so we implement it here with shader code.
10492 *
10493 * Note that this is kind of pointless, actually. Here we're clobbering
10494 * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind
10495 * up with 100% coverage. That's almost certainly not what the user wants.
10496 * The work-around is to add extra shader code to compute coverage from alpha
10497 * and write it to the coverage output register (if the user's shader doesn't
10498 * do so already). We'll probably do that in the future.
10499 */
10500 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10501 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
10502 unsigned fs_color_tmp_index)
10503 {
10504 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
10505 unsigned i;
10506
10507 /* Note: it's not 100% clear from the spec if we're supposed to clobber
10508 * the alpha for all render targets. But that's what NVIDIA does and
10509 * that's what Piglit tests.
10510 */
10511 for (i = 0; i < emit->fs.num_color_outputs; i++) {
10512 struct tgsi_full_dst_register color_dst;
10513
10514 if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
10515 /* write to the temp color register */
10516 color_dst = make_dst_temp_reg(fs_color_tmp_index);
10517 }
10518 else {
10519 /* write directly to the color[i] output */
10520 color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
10521 }
10522
10523 color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
10524
10525 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
10526 }
10527 }
10528
10529
10530 /**
10531 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
10532 * against the alpha reference value and discards the fragment if the
10533 * comparison fails.
10534 */
10535 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10536 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
10537 unsigned fs_color_tmp_index)
10538 {
10539 /* compare output color's alpha to alpha ref and kill */
10540 unsigned tmp = get_temp_index(emit);
10541 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10542 struct tgsi_full_src_register tmp_src_x =
10543 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10544 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10545 struct tgsi_full_src_register color_src =
10546 make_src_temp_reg(fs_color_tmp_index);
10547 struct tgsi_full_src_register color_src_w =
10548 scalar_src(&color_src, TGSI_SWIZZLE_W);
10549 struct tgsi_full_src_register ref_src =
10550 make_src_immediate_reg(emit->fs.alpha_ref_index);
10551 struct tgsi_full_dst_register color_dst =
10552 make_dst_output_reg(emit->fs.color_out_index[0]);
10553
10554 assert(emit->unit == PIPE_SHADER_FRAGMENT);
10555
10556 /* dst = src0 'alpha_func' src1 */
10557 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
10558 &color_src_w, &ref_src);
10559
10560 /* DISCARD if dst.x == 0 */
10561 begin_emit_instruction(emit);
10562 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */
10563 emit_src_register(emit, &tmp_src_x);
10564 end_emit_instruction(emit);
10565
10566 /* If we don't need to broadcast the color below, emit the final color here.
10567 */
10568 if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
10569 /* MOV output.color, tempcolor */
10570 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10571 }
10572
10573 free_temp_indexes(emit);
10574 }
10575
10576
10577 /**
10578 * Emit instructions for writing a single color output to multiple
10579 * color buffers.
10580 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
10581 * when key.fs.white_fragments is true).
10582 * property is set and the number of render targets is greater than one.
10583 * \param fs_color_tmp_index index of the temp register that holds the
10584 * color to broadcast.
10585 */
10586 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10587 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
10588 unsigned fs_color_tmp_index)
10589 {
10590 const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
10591 unsigned i;
10592 struct tgsi_full_src_register color_src;
10593
10594 if (emit->key.fs.white_fragments) {
10595 /* set all color outputs to white */
10596 color_src = make_immediate_reg_float(emit, 1.0f);
10597 }
10598 else {
10599 /* set all color outputs to TEMP[fs_color_tmp_index] */
10600 assert(fs_color_tmp_index != INVALID_INDEX);
10601 color_src = make_src_temp_reg(fs_color_tmp_index);
10602 }
10603
10604 assert(emit->unit == PIPE_SHADER_FRAGMENT);
10605
10606 for (i = 0; i < n; i++) {
10607 unsigned output_reg = emit->fs.color_out_index[i];
10608 struct tgsi_full_dst_register color_dst =
10609 make_dst_output_reg(output_reg);
10610
10611 /* Fill in this semantic here since we'll use it later in
10612 * emit_dst_register().
10613 */
10614 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
10615
10616 /* MOV output.color[i], tempcolor */
10617 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10618 }
10619 }
10620
10621
10622 /**
10623 * Emit extra helper code after the original shader code, but before the
10624 * last END/RET instruction.
10625 * For vertex shaders this means emitting the extra code to apply the
10626 * prescale scale/translation.
10627 */
10628 static boolean
emit_post_helpers(struct svga_shader_emitter_v10 * emit)10629 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
10630 {
10631 if (emit->unit == PIPE_SHADER_VERTEX) {
10632 emit_vertex_instructions(emit);
10633 }
10634 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
10635 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
10636
10637 assert(!(emit->key.fs.white_fragments &&
10638 emit->key.fs.write_color0_to_n_cbufs == 0));
10639
10640 /* We no longer want emit_dst_register() to substitute the
10641 * temporary fragment color register for the real color output.
10642 */
10643 emit->fs.color_tmp_index = INVALID_INDEX;
10644
10645 if (emit->key.fs.alpha_to_one) {
10646 emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
10647 }
10648 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10649 emit_alpha_test_instructions(emit, fs_color_tmp_index);
10650 }
10651 if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
10652 emit->key.fs.white_fragments) {
10653 emit_broadcast_color_instructions(emit, fs_color_tmp_index);
10654 }
10655 }
10656 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10657 if (!emit->tcs.control_point_phase) {
10658 /* store the tessellation levels in the patch constant phase only */
10659 store_tesslevels(emit);
10660 }
10661 else {
10662 emit_clipping_instructions(emit);
10663 }
10664 }
10665 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10666 emit_vertex_instructions(emit);
10667 }
10668
10669 return TRUE;
10670 }
10671
10672
10673 /**
10674 * Translate the TGSI tokens into VGPU10 tokens.
10675 */
10676 static boolean
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)10677 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
10678 const struct tgsi_token *tokens)
10679 {
10680 struct tgsi_parse_context parse;
10681 boolean ret = TRUE;
10682 boolean pre_helpers_emitted = FALSE;
10683 unsigned inst_number = 0;
10684
10685 tgsi_parse_init(&parse, tokens);
10686
10687 while (!tgsi_parse_end_of_tokens(&parse)) {
10688
10689 /* Save the current tgsi token starting position */
10690 emit->cur_tgsi_token = parse.Position;
10691
10692 tgsi_parse_token(&parse);
10693
10694 switch (parse.FullToken.Token.Type) {
10695 case TGSI_TOKEN_TYPE_IMMEDIATE:
10696 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
10697 if (!ret)
10698 goto done;
10699 break;
10700
10701 case TGSI_TOKEN_TYPE_DECLARATION:
10702 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
10703 if (!ret)
10704 goto done;
10705 break;
10706
10707 case TGSI_TOKEN_TYPE_INSTRUCTION:
10708 if (!pre_helpers_emitted) {
10709 ret = emit_pre_helpers(emit);
10710 if (!ret)
10711 goto done;
10712 pre_helpers_emitted = TRUE;
10713 }
10714 ret = emit_vgpu10_instruction(emit, inst_number++,
10715 &parse.FullToken.FullInstruction);
10716
10717 /* Usually this applies to TCS only. If shader is reading control
10718 * point outputs in control point phase, we should reemit all
10719 * instructions which are writting into control point output in
10720 * control phase to store results into temporaries.
10721 */
10722 if (emit->reemit_instruction) {
10723 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10724 ret = emit_vgpu10_instruction(emit, inst_number,
10725 &parse.FullToken.FullInstruction);
10726 }
10727 else if (emit->initialize_temp_index != INVALID_INDEX) {
10728 emit_initialize_temp_instruction(emit);
10729 emit->initialize_temp_index = INVALID_INDEX;
10730 ret = emit_vgpu10_instruction(emit, inst_number - 1,
10731 &parse.FullToken.FullInstruction);
10732 }
10733
10734 if (!ret)
10735 goto done;
10736 break;
10737
10738 case TGSI_TOKEN_TYPE_PROPERTY:
10739 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
10740 if (!ret)
10741 goto done;
10742 break;
10743
10744 default:
10745 break;
10746 }
10747 }
10748
10749 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10750 ret = emit_hull_shader_patch_constant_phase(emit, &parse);
10751 }
10752
10753 done:
10754 tgsi_parse_free(&parse);
10755 return ret;
10756 }
10757
10758
10759 /**
10760 * Emit the first VGPU10 shader tokens.
10761 */
10762 static boolean
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)10763 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
10764 {
10765 VGPU10ProgramToken ptoken;
10766
10767 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
10768 ptoken.value = 0; /* init whole token to zero */
10769 ptoken.majorVersion = emit->version / 10;
10770 ptoken.minorVersion = emit->version % 10;
10771 ptoken.programType = translate_shader_type(emit->unit);
10772 if (!emit_dword(emit, ptoken.value))
10773 return FALSE;
10774
10775 /* Second token: total length of shader, in tokens. We can't fill this
10776 * in until we're all done. Emit zero for now.
10777 */
10778 if (!emit_dword(emit, 0))
10779 return FALSE;
10780
10781 if (emit->version >= 50) {
10782 VGPU10OpcodeToken0 token;
10783
10784 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10785 /* For hull shader, we need to start the declarations phase first before
10786 * emitting any declarations including the global flags.
10787 */
10788 token.value = 0;
10789 token.opcodeType = VGPU10_OPCODE_HS_DECLS;
10790 begin_emit_instruction(emit);
10791 emit_dword(emit, token.value);
10792 end_emit_instruction(emit);
10793 }
10794
10795 /* Emit global flags */
10796 token.value = 0; /* init whole token to zero */
10797 token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10798 token.enableDoublePrecisionFloatOps = 1; /* set bit */
10799 token.instructionLength = 1;
10800 if (!emit_dword(emit, token.value))
10801 return FALSE;
10802 }
10803
10804 if (emit->version >= 40) {
10805 VGPU10OpcodeToken0 token;
10806
10807 /* Reserved for global flag such as refactoringAllowed.
10808 * If the shader does not use the precise qualifier, we will set the
10809 * refactoringAllowed global flag; otherwise, we will leave the reserved
10810 * token to NOP.
10811 */
10812 emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
10813 token.value = 0;
10814 token.opcodeType = VGPU10_OPCODE_NOP;
10815 token.instructionLength = 1;
10816 if (!emit_dword(emit, token.value))
10817 return FALSE;
10818 }
10819
10820 return TRUE;
10821 }
10822
10823
10824 static boolean
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)10825 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
10826 {
10827 VGPU10ProgramToken *tokens;
10828
10829 /* Replace the second token with total shader length */
10830 tokens = (VGPU10ProgramToken *) emit->buf;
10831 tokens[1].value = emit_get_num_tokens(emit);
10832
10833 if (emit->version >= 40 && !emit->uses_precise_qualifier) {
10834 /* Replace the reserved token with the RefactoringAllowed global flag */
10835 VGPU10OpcodeToken0 *ptoken;
10836
10837 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
10838 assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
10839 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10840 ptoken->refactoringAllowed = 1;
10841 }
10842
10843 return TRUE;
10844 }
10845
10846
10847 /**
10848 * Modify the FS to read the BCOLORs and use the FACE register
10849 * to choose between the front/back colors.
10850 */
10851 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)10852 transform_fs_twoside(const struct tgsi_token *tokens)
10853 {
10854 if (0) {
10855 debug_printf("Before tgsi_add_two_side ------------------\n");
10856 tgsi_dump(tokens,0);
10857 }
10858 tokens = tgsi_add_two_side(tokens);
10859 if (0) {
10860 debug_printf("After tgsi_add_two_side ------------------\n");
10861 tgsi_dump(tokens, 0);
10862 }
10863 return tokens;
10864 }
10865
10866
10867 /**
10868 * Modify the FS to do polygon stipple.
10869 */
10870 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)10871 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
10872 const struct tgsi_token *tokens)
10873 {
10874 const struct tgsi_token *new_tokens;
10875 unsigned unit;
10876
10877 if (0) {
10878 debug_printf("Before pstipple ------------------\n");
10879 tgsi_dump(tokens,0);
10880 }
10881
10882 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
10883 TGSI_FILE_INPUT);
10884
10885 emit->fs.pstipple_sampler_unit = unit;
10886
10887 /* Setup texture state for stipple */
10888 emit->sampler_target[unit] = TGSI_TEXTURE_2D;
10889 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
10890 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
10891 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
10892 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
10893 emit->key.tex[unit].target = PIPE_TEXTURE_2D;
10894
10895 if (0) {
10896 debug_printf("After pstipple ------------------\n");
10897 tgsi_dump(new_tokens, 0);
10898 }
10899
10900 return new_tokens;
10901 }
10902
10903 /**
10904 * Modify the FS to support anti-aliasing point.
10905 */
10906 static const struct tgsi_token *
transform_fs_aapoint(const struct tgsi_token * tokens,int aa_coord_index)10907 transform_fs_aapoint(const struct tgsi_token *tokens,
10908 int aa_coord_index)
10909 {
10910 if (0) {
10911 debug_printf("Before tgsi_add_aa_point ------------------\n");
10912 tgsi_dump(tokens,0);
10913 }
10914 tokens = tgsi_add_aa_point(tokens, aa_coord_index);
10915 if (0) {
10916 debug_printf("After tgsi_add_aa_point ------------------\n");
10917 tgsi_dump(tokens, 0);
10918 }
10919 return tokens;
10920 }
10921
10922
10923 /**
10924 * A helper function to determine the shader in the previous stage and
10925 * then call the linker function to determine the input mapping for this
10926 * shader to match the output indices from the shader in the previous stage.
10927 */
10928 static void
compute_input_mapping(struct svga_context * svga,struct svga_shader_emitter_v10 * emit,enum pipe_shader_type unit)10929 compute_input_mapping(struct svga_context *svga,
10930 struct svga_shader_emitter_v10 *emit,
10931 enum pipe_shader_type unit)
10932 {
10933 struct svga_shader *prevShader = NULL; /* shader in the previous stage */
10934
10935 if (unit == PIPE_SHADER_FRAGMENT) {
10936 prevShader = svga->curr.gs ?
10937 &svga->curr.gs->base : (svga->curr.tes ?
10938 &svga->curr.tes->base : &svga->curr.vs->base);
10939 } else if (unit == PIPE_SHADER_GEOMETRY) {
10940 prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
10941 } else if (unit == PIPE_SHADER_TESS_EVAL) {
10942 assert(svga->curr.tcs);
10943 prevShader = &svga->curr.tcs->base;
10944 } else if (unit == PIPE_SHADER_TESS_CTRL) {
10945 assert(svga->curr.vs);
10946 prevShader = &svga->curr.vs->base;
10947 }
10948
10949 if (prevShader != NULL) {
10950 svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage);
10951 emit->prevShaderInfo = &prevShader->info;
10952 }
10953 else {
10954 /**
10955 * Since vertex shader does not need to go through the linker to
10956 * establish the input map, we need to make sure the highest index
10957 * of input registers is set properly here.
10958 */
10959 emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
10960 emit->info.file_max[TGSI_FILE_INPUT]);
10961 }
10962 }
10963
10964
10965 /**
10966 * Copies the shader signature info to the shader variant
10967 */
10968 static void
copy_shader_signature(struct svga_shader_signature * sgn,struct svga_shader_variant * variant)10969 copy_shader_signature(struct svga_shader_signature *sgn,
10970 struct svga_shader_variant *variant)
10971 {
10972 SVGA3dDXShaderSignatureHeader *header = &sgn->header;
10973
10974 /* Calculate the signature length */
10975 variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
10976 (header->numInputSignatures +
10977 header->numOutputSignatures +
10978 header->numPatchConstantSignatures) *
10979 sizeof(SVGA3dDXShaderSignatureEntry);
10980
10981 /* Allocate buffer for the signature info */
10982 variant->signature =
10983 (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
10984
10985 char *sgnBuf = (char *)variant->signature;
10986 unsigned sgnLen;
10987
10988 /* Copy the signature info to the shader variant structure */
10989 memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
10990 sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
10991
10992 if (header->numInputSignatures) {
10993 sgnLen =
10994 header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10995 memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
10996 sgnBuf += sgnLen;
10997 }
10998
10999 if (header->numOutputSignatures) {
11000 sgnLen =
11001 header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
11002 memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
11003 sgnBuf += sgnLen;
11004 }
11005
11006 if (header->numPatchConstantSignatures) {
11007 sgnLen =
11008 header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
11009 memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
11010 }
11011 }
11012
11013
11014 /**
11015 * This is the main entrypoint for the TGSI -> VPGU10 translator.
11016 */
11017 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,enum pipe_shader_type unit)11018 svga_tgsi_vgpu10_translate(struct svga_context *svga,
11019 const struct svga_shader *shader,
11020 const struct svga_compile_key *key,
11021 enum pipe_shader_type unit)
11022 {
11023 struct svga_shader_variant *variant = NULL;
11024 struct svga_shader_emitter_v10 *emit;
11025 const struct tgsi_token *tokens = shader->tokens;
11026
11027 (void) make_immediate_reg_double; /* unused at this time */
11028
11029 assert(unit == PIPE_SHADER_VERTEX ||
11030 unit == PIPE_SHADER_GEOMETRY ||
11031 unit == PIPE_SHADER_FRAGMENT ||
11032 unit == PIPE_SHADER_TESS_CTRL ||
11033 unit == PIPE_SHADER_TESS_EVAL ||
11034 unit == PIPE_SHADER_COMPUTE);
11035
11036 /* These two flags cannot be used together */
11037 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
11038
11039 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
11040 /*
11041 * Setup the code emitter
11042 */
11043 emit = alloc_emitter();
11044 if (!emit)
11045 goto done;
11046
11047 emit->unit = unit;
11048 if (svga_have_sm5(svga)) {
11049 emit->version = 50;
11050 } else if (svga_have_sm4_1(svga)) {
11051 emit->version = 41;
11052 } else {
11053 emit->version = 40;
11054 }
11055
11056 emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
11057
11058 emit->key = *key;
11059
11060 emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
11061 emit->key.gs.need_prescale ||
11062 emit->key.tes.need_prescale);
11063
11064 /* Determine how many prescale factors in the constant buffer */
11065 emit->vposition.num_prescale = 1;
11066 if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
11067 assert(emit->unit == PIPE_SHADER_GEOMETRY);
11068 emit->vposition.num_prescale = emit->key.gs.num_prescale;
11069 }
11070
11071 emit->vposition.tmp_index = INVALID_INDEX;
11072 emit->vposition.so_index = INVALID_INDEX;
11073 emit->vposition.out_index = INVALID_INDEX;
11074
11075 emit->vs.vertex_id_sys_index = INVALID_INDEX;
11076 emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11077 emit->vs.vertex_id_bias_index = INVALID_INDEX;
11078
11079 emit->fs.color_tmp_index = INVALID_INDEX;
11080 emit->fs.face_input_index = INVALID_INDEX;
11081 emit->fs.fragcoord_input_index = INVALID_INDEX;
11082 emit->fs.sample_id_sys_index = INVALID_INDEX;
11083 emit->fs.sample_pos_sys_index = INVALID_INDEX;
11084 emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
11085 emit->fs.layer_input_index = INVALID_INDEX;
11086 emit->fs.layer_imm_index = INVALID_INDEX;
11087
11088 emit->gs.prim_id_index = INVALID_INDEX;
11089 emit->gs.invocation_id_sys_index = INVALID_INDEX;
11090 emit->gs.viewport_index_out_index = INVALID_INDEX;
11091 emit->gs.viewport_index_tmp_index = INVALID_INDEX;
11092
11093 emit->tcs.vertices_per_patch_index = INVALID_INDEX;
11094 emit->tcs.invocation_id_sys_index = INVALID_INDEX;
11095 emit->tcs.control_point_input_index = INVALID_INDEX;
11096 emit->tcs.control_point_addr_index = INVALID_INDEX;
11097 emit->tcs.control_point_out_index = INVALID_INDEX;
11098 emit->tcs.control_point_tmp_index = INVALID_INDEX;
11099 emit->tcs.control_point_out_count = 0;
11100 emit->tcs.inner.out_index = INVALID_INDEX;
11101 emit->tcs.inner.out_index = INVALID_INDEX;
11102 emit->tcs.inner.temp_index = INVALID_INDEX;
11103 emit->tcs.inner.tgsi_index = INVALID_INDEX;
11104 emit->tcs.outer.out_index = INVALID_INDEX;
11105 emit->tcs.outer.temp_index = INVALID_INDEX;
11106 emit->tcs.outer.tgsi_index = INVALID_INDEX;
11107 emit->tcs.patch_generic_out_count = 0;
11108 emit->tcs.patch_generic_out_index = INVALID_INDEX;
11109 emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
11110 emit->tcs.prim_id_index = INVALID_INDEX;
11111
11112 emit->tes.tesscoord_sys_index = INVALID_INDEX;
11113 emit->tes.inner.in_index = INVALID_INDEX;
11114 emit->tes.inner.temp_index = INVALID_INDEX;
11115 emit->tes.inner.tgsi_index = INVALID_INDEX;
11116 emit->tes.outer.in_index = INVALID_INDEX;
11117 emit->tes.outer.temp_index = INVALID_INDEX;
11118 emit->tes.outer.tgsi_index = INVALID_INDEX;
11119 emit->tes.prim_id_index = INVALID_INDEX;
11120
11121 emit->clip_dist_out_index = INVALID_INDEX;
11122 emit->clip_dist_tmp_index = INVALID_INDEX;
11123 emit->clip_dist_so_index = INVALID_INDEX;
11124 emit->clip_vertex_out_index = INVALID_INDEX;
11125 emit->clip_vertex_tmp_index = INVALID_INDEX;
11126 emit->svga_debug_callback = svga->debug.callback;
11127
11128 emit->index_range.start_index = INVALID_INDEX;
11129 emit->index_range.count = 0;
11130 emit->index_range.required = FALSE;
11131 emit->index_range.operandType = VGPU10_NUM_OPERANDS;
11132 emit->index_range.dim = 0;
11133 emit->index_range.size = 0;
11134
11135 emit->current_loop_depth = 0;
11136
11137 emit->initialize_temp_index = INVALID_INDEX;
11138
11139 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
11140 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
11141 }
11142
11143 if (unit == PIPE_SHADER_FRAGMENT) {
11144 if (key->fs.light_twoside) {
11145 tokens = transform_fs_twoside(tokens);
11146 }
11147 if (key->fs.pstipple) {
11148 const struct tgsi_token *new_tokens =
11149 transform_fs_pstipple(emit, tokens);
11150 if (tokens != shader->tokens) {
11151 /* free the two-sided shader tokens */
11152 tgsi_free_tokens(tokens);
11153 }
11154 tokens = new_tokens;
11155 }
11156 if (key->fs.aa_point) {
11157 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
11158 }
11159 }
11160
11161 if (SVGA_DEBUG & DEBUG_TGSI) {
11162 debug_printf("#####################################\n");
11163 debug_printf("### TGSI Shader %u\n", shader->id);
11164 tgsi_dump(tokens, 0);
11165 }
11166
11167 /**
11168 * Rescan the header if the token string is different from the one
11169 * included in the shader; otherwise, the header info is already up-to-date
11170 */
11171 if (tokens != shader->tokens) {
11172 tgsi_scan_shader(tokens, &emit->info);
11173 } else {
11174 emit->info = shader->info;
11175 }
11176
11177 emit->num_outputs = emit->info.num_outputs;
11178
11179 /**
11180 * Compute input mapping to match the outputs from shader
11181 * in the previous stage
11182 */
11183 compute_input_mapping(svga, emit, unit);
11184
11185 determine_clipping_mode(emit);
11186
11187 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
11188 unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
11189 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
11190 /* if there is stream output declarations associated
11191 * with this shader or the shader writes to ClipDistance
11192 * then reserve extra registers for the non-adjusted vertex position
11193 * and the ClipDistance shadow copy.
11194 */
11195 emit->vposition.so_index = emit->num_outputs++;
11196
11197 if (emit->clip_mode == CLIP_DISTANCE) {
11198 emit->clip_dist_so_index = emit->num_outputs++;
11199 if (emit->info.num_written_clipdistance > 4)
11200 emit->num_outputs++;
11201 }
11202 }
11203 }
11204
11205 /*
11206 * Do actual shader translation.
11207 */
11208 if (!emit_vgpu10_header(emit)) {
11209 debug_printf("svga: emit VGPU10 header failed\n");
11210 goto cleanup;
11211 }
11212
11213 if (!emit_vgpu10_instructions(emit, tokens)) {
11214 debug_printf("svga: emit VGPU10 instructions failed\n");
11215 goto cleanup;
11216 }
11217
11218 if (!emit_vgpu10_tail(emit)) {
11219 debug_printf("svga: emit VGPU10 tail failed\n");
11220 goto cleanup;
11221 }
11222
11223 if (emit->register_overflow) {
11224 goto cleanup;
11225 }
11226
11227 /*
11228 * Create, initialize the 'variant' object.
11229 */
11230 variant = svga_new_shader_variant(svga, unit);
11231 if (!variant)
11232 goto cleanup;
11233
11234 variant->shader = shader;
11235 variant->nr_tokens = emit_get_num_tokens(emit);
11236 variant->tokens = (const unsigned *)emit->buf;
11237
11238 /* Copy shader signature info to the shader variant */
11239 if (svga_have_sm5(svga)) {
11240 copy_shader_signature(&emit->signature, variant);
11241 }
11242
11243 emit->buf = NULL; /* buffer is no longer owed by emitter context */
11244 memcpy(&variant->key, key, sizeof(*key));
11245 variant->id = UTIL_BITMASK_INVALID_INDEX;
11246
11247 /* The extra constant starting offset starts with the number of
11248 * shader constants declared in the shader.
11249 */
11250 variant->extra_const_start = emit->num_shader_consts[0];
11251 if (key->gs.wide_point) {
11252 /**
11253 * The extra constant added in the transformed shader
11254 * for inverse viewport scale is to be supplied by the driver.
11255 * So the extra constant starting offset needs to be reduced by 1.
11256 */
11257 assert(variant->extra_const_start > 0);
11258 variant->extra_const_start--;
11259 }
11260
11261 if (unit == PIPE_SHADER_FRAGMENT) {
11262 struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
11263
11264 fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
11265
11266 /* If there was exactly one write to a fragment shader output register
11267 * and it came from a constant buffer, we know all fragments will have
11268 * the same color (except for blending).
11269 */
11270 fs_variant->constant_color_output =
11271 emit->constant_color_output && emit->num_output_writes == 1;
11272
11273 /** keep track in the variant if flat interpolation is used
11274 * for any of the varyings.
11275 */
11276 fs_variant->uses_flat_interp = emit->uses_flat_interp;
11277
11278 fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
11279 }
11280 else if (unit == PIPE_SHADER_TESS_EVAL) {
11281 struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
11282
11283 /* Keep track in the tes variant some of the layout parameters.
11284 * These parameters will be referenced by the tcs to emit
11285 * the necessary declarations for the hull shader.
11286 */
11287 tes_variant->prim_mode = emit->tes.prim_mode;
11288 tes_variant->spacing = emit->tes.spacing;
11289 tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
11290 tes_variant->point_mode = emit->tes.point_mode;
11291 }
11292
11293
11294 if (tokens != shader->tokens) {
11295 tgsi_free_tokens(tokens);
11296 }
11297
11298 cleanup:
11299 free_emitter(emit);
11300
11301 done:
11302 SVGA_STATS_TIME_POP(svga_sws(svga));
11303 return variant;
11304 }
11305