1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * TGSI to LLVM IR translation -- SoA.
32  *
33  * @author Jose Fonseca <jfonseca@vmware.com>
34  *
35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36  * Brian Paul, and others.
37  */
38 
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71 
72 #define DUMP_GS_EMITS 0
73 
74 /*
75  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76  * instruction.
77  *
78  * TODO:
79  * - take execution masks in consideration
80  * - debug control-flow instructions
81  */
82 #define DEBUG_EXECUTION 0
83 
84 
85 /*
86  * Emit code to print a register value.
87  */
88 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)89 emit_dump_reg(struct gallivm_state *gallivm,
90               unsigned file,
91               unsigned index,
92               unsigned chan,
93               LLVMValueRef value)
94 {
95    char buf[32];
96 
97    snprintf(buf, sizeof buf, "    %s[%u].%c = ",
98             tgsi_file_name(file),
99             index, "xyzw"[chan]);
100 
101    lp_build_print_value(gallivm, buf, value);
102 }
103 
104 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)105 func_ctx(struct lp_exec_mask *mask)
106 {
107    assert(mask->function_stack_size > 0);
108    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109    return &mask->function_stack[mask->function_stack_size - 1];
110 }
111 
112 /*
113  * combine the execution mask if there is one with the current mask.
114  */
115 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120    struct lp_exec_mask *exec_mask = &bld->exec_mask;
121    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122    if (!exec_mask->has_mask) {
123       return bld_mask;
124    }
125    if (!bld_mask)
126       return exec_mask->exec_mask;
127    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128                        exec_mask->exec_mask, "");
129 }
130 
lp_exec_tgsi_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132                           struct lp_build_tgsi_context * bld_base)
133 {
134    enum tgsi_opcode opcode =
135       bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136    bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137                         opcode == TGSI_OPCODE_CASE);
138    lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140 
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)141 static void lp_exec_switch(struct lp_exec_mask *mask,
142                            LLVMValueRef switchval)
143 {
144    struct function_ctx *ctx = func_ctx(mask);
145 
146    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148       ctx->switch_stack_size++;
149       return;
150    }
151 
152    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153       ctx->break_type;
154    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155 
156    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161    ctx->switch_stack_size++;
162 
163    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164    ctx->switch_val = switchval;
165    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166    ctx->switch_in_default = false;
167    ctx->switch_pc = 0;
168 
169    lp_exec_mask_update(mask);
170 }
171 
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173                               struct lp_build_tgsi_context * bld_base)
174 {
175    LLVMBuilderRef builder = mask->bld->gallivm->builder;
176    struct function_ctx *ctx = func_ctx(mask);
177 
178    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179       ctx->switch_stack_size--;
180       return;
181    }
182 
183    /* check if there's deferred default if so do it now */
184    if (ctx->switch_pc && !ctx->switch_in_default) {
185       LLVMValueRef prevmask, defaultmask;
186       unsigned tmp_pc;
187       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190       ctx->switch_in_default = true;
191 
192       lp_exec_mask_update(mask);
193 
194       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195              TGSI_OPCODE_DEFAULT);
196 
197       tmp_pc = bld_base->pc;
198       bld_base->pc = ctx->switch_pc;
199       /*
200        * re-purpose switch_pc to point to here again, since we stop execution of
201        * the deferred default after next break.
202        */
203       ctx->switch_pc = tmp_pc - 1;
204 
205       return;
206    }
207 
208    else if (ctx->switch_pc && ctx->switch_in_default) {
209       assert(bld_base->pc == ctx->switch_pc + 1);
210    }
211 
212    ctx->switch_stack_size--;
213    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218 
219    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220 
221    lp_exec_mask_update(mask);
222 }
223 
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)224 static void lp_exec_case(struct lp_exec_mask *mask,
225                          LLVMValueRef caseval)
226 {
227    LLVMBuilderRef builder = mask->bld->gallivm->builder;
228    struct function_ctx *ctx = func_ctx(mask);
229 
230    LLVMValueRef casemask, prevmask;
231 
232    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233       return;
234    }
235 
236    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237    if (!ctx->switch_in_default) {
238       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241                                              ctx->switch_mask_default, "sw_default_mask");
242       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244 
245       lp_exec_mask_update(mask);
246    }
247 }
248 
249 /*
250  * Analyse default statement in a switch.
251  * \return true if default is last statement, false otherwise
252  * \param default_pc_start contains pc of instruction to jump to
253  *                         if default wasn't last but there's no
254  *                         fallthrough into default.
255  */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257                                        struct lp_build_tgsi_context * bld_base,
258                                        int *default_pc_start)
259 {
260    unsigned pc = bld_base->pc;
261    struct function_ctx *ctx = func_ctx(mask);
262    int curr_switch_stack = ctx->switch_stack_size;
263 
264    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265       return false;
266    }
267 
268    /* skip over case statements which are together with default */
269    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270       pc++;
271    }
272 
273    while (pc != ~0u && pc < bld_base->num_instructions) {
274       enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275       switch (opcode) {
276       case TGSI_OPCODE_CASE:
277          if (curr_switch_stack == ctx->switch_stack_size) {
278             *default_pc_start = pc - 1;
279             return false;
280          }
281          break;
282       case TGSI_OPCODE_SWITCH:
283          curr_switch_stack++;
284          break;
285       case TGSI_OPCODE_ENDSWITCH:
286          if (curr_switch_stack == ctx->switch_stack_size) {
287             *default_pc_start = pc - 1;
288             return true;
289          }
290          curr_switch_stack--;
291          break;
292       default:
293          ; /* nothing */
294       }
295       pc++;
296    }
297    /* should never arrive here */
298    assert(0);
299    return true;
300 }
301 
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)302 static void lp_exec_default(struct lp_exec_mask *mask,
303                             struct lp_build_tgsi_context * bld_base)
304 {
305    LLVMBuilderRef builder = mask->bld->gallivm->builder;
306    struct function_ctx *ctx = func_ctx(mask);
307 
308    int default_exec_pc = 0;
309    boolean default_is_last;
310 
311    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312       return;
313    }
314 
315    /*
316     * This is a messy opcode, because it may not be always at the end and
317     * there can be fallthrough in and out of it.
318     */
319 
320    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321    /*
322     * If it is last statement in switch (note that case statements appearing
323     * "at the same time" as default don't change that) everything is just fine,
324     * update switch mask and go on. This means we can handle default with
325     * fallthrough INTO it without overhead, if it is last.
326     */
327    if (default_is_last) {
328       LLVMValueRef prevmask, defaultmask;
329       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333       ctx->switch_in_default = true;
334 
335       lp_exec_mask_update(mask);
336    }
337    else {
338       /*
339        * Technically, "case" immediately before default isn't really a
340        * fallthrough, however we still have to count them as such as we
341        * already have updated the masks.
342        * If that happens in practice could add a switch optimizer pass
343        * which just gets rid of all case statements appearing together with
344        * default (or could do switch analysis at switch start time instead).
345        */
346       enum tgsi_opcode opcode =
347          bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348       boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349                          opcode != TGSI_OPCODE_SWITCH);
350       /*
351        * If it is not last statement and there was no fallthrough into it,
352        * we record the PC and continue execution at next case (again, those
353        * case encountered at the same time don't count). At endswitch
354        * time, we update switchmask, and go back executing the code we skipped
355        * until the next break (possibly re-executing some code with changed mask
356        * if there was a fallthrough out of default).
357        * Finally, if it is not last statement and there was a fallthrough into it,
358        * do the same as with the former case, except instead of skipping the code
359        * just execute it without updating the mask, then go back and re-execute.
360        */
361       ctx->switch_pc = bld_base->pc;
362       if (!ft_into) {
363          bld_base->pc = default_exec_pc;
364       }
365    }
366 }
367 
368 
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370                               int func,
371                               int *pc)
372 {
373    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374       return;
375    }
376 
377    lp_exec_mask_function_init(mask, mask->function_stack_size);
378    mask->function_stack[mask->function_stack_size].pc = *pc;
379    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380    mask->function_stack_size++;
381    *pc = func;
382 }
383 
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386    LLVMBuilderRef builder = mask->bld->gallivm->builder;
387    struct function_ctx *ctx = func_ctx(mask);
388    LLVMValueRef exec_mask;
389 
390    if (ctx->cond_stack_size == 0 &&
391        ctx->loop_stack_size == 0 &&
392        ctx->switch_stack_size == 0 &&
393        mask->function_stack_size == 1) {
394       /* returning from main() */
395       *pc = -1;
396       return;
397    }
398 
399    if (mask->function_stack_size == 1) {
400       /*
401        * This requires special handling since we need to ensure
402        * we don't drop the mask even if we have no call stack
403        * (e.g. after a ret in a if clause after the endif)
404        */
405       mask->ret_in_main = TRUE;
406    }
407 
408    exec_mask = LLVMBuildNot(builder,
409                             mask->exec_mask,
410                             "ret");
411 
412    mask->ret_mask = LLVMBuildAnd(builder,
413                                  mask->ret_mask,
414                                  exec_mask, "ret_full");
415 
416    lp_exec_mask_update(mask);
417 }
418 
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422 
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425    struct function_ctx *ctx;
426 
427    assert(mask->function_stack_size > 1);
428    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429 
430    ctx = func_ctx(mask);
431    mask->function_stack_size--;
432 
433    *pc = ctx->pc;
434    mask->ret_mask = ctx->ret_mask;
435 
436    lp_exec_mask_update(mask);
437 }
438 
439 
440 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442              unsigned file,
443              int index,
444              unsigned chan)
445 {
446    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448    LLVMValueRef var_of_array;
449 
450    switch (file) {
451    case TGSI_FILE_TEMPORARY:
452       array_of_vars = bld->temps;
453       var_of_array = bld->temps_array;
454       break;
455    case TGSI_FILE_OUTPUT:
456       array_of_vars = bld->outputs;
457       var_of_array = bld->outputs_array;
458       break;
459    default:
460       assert(0);
461       return NULL;
462    }
463 
464    assert(chan < 4);
465 
466    if (bld->indirect_files & (1 << file)) {
467       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468       if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469          LLVMValueRef gep[2];
470          gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471          gep[1] = lindex;
472          return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473       } else {
474          return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475       }
476    }
477    else {
478       assert(index <= bld->bld_base.info->file_max[file]);
479       return array_of_vars[index][chan];
480    }
481 }
482 
483 
484 /**
485  * Return pointer to a temporary register channel (src or dest).
486  * Note that indirect addressing cannot be handled here.
487  * \param index  which temporary register
488  * \param chan  which channel of the temp register.
489  */
490 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492              unsigned index,
493              unsigned chan)
494 {
495    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497 
498 /**
499  * Return pointer to a output register channel (src or dest).
500  * Note that indirect addressing cannot be handled here.
501  * \param index  which output register
502  * \param chan  which channel of the output register.
503  */
504 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506                unsigned index,
507                unsigned chan)
508 {
509    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511 
512 /*
513  * If we have indirect addressing in outputs copy our alloca array
514  * to the outputs slots specified by the caller to make sure
515  * our outputs are delivered consistently via the same interface.
516  */
517 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521       unsigned index, chan;
522       assert(bld->bld_base.info->num_outputs <=
523              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527          }
528       }
529    }
530 }
531 
532 /**
533  * Gather vector.
534  * XXX the lp_build_gather() function should be capable of doing this
535  * with a little work.
536  */
537 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)538 build_gather(struct lp_build_tgsi_context *bld_base,
539              LLVMValueRef base_ptr,
540              LLVMValueRef indexes,
541              LLVMValueRef overflow_mask,
542              LLVMValueRef indexes2)
543 {
544    struct gallivm_state *gallivm = bld_base->base.gallivm;
545    LLVMBuilderRef builder = gallivm->builder;
546    struct lp_build_context *uint_bld = &bld_base->uint_bld;
547    struct lp_build_context *bld = &bld_base->base;
548    LLVMValueRef res;
549    unsigned i;
550 
551    if (indexes2)
552       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553    else
554       res = bld->undef;
555    /*
556     * overflow_mask is a vector telling us which channels
557     * in the vector overflowed. We use the overflow behavior for
558     * constant buffers which is defined as:
559     * Out of bounds access to constant buffer returns 0 in all
560     * components. Out of bounds behavior is always with respect
561     * to the size of the buffer bound at that slot.
562     */
563 
564    if (overflow_mask) {
565       /*
566        * We avoid per-element control flow here (also due to llvm going crazy,
567        * though I suspect it's better anyway since overflow is likely rare).
568        * Note that since we still fetch from buffers even if num_elements was
569        * zero (in this case we'll fetch from index zero) the jit func callers
570        * MUST provide valid fake constant buffers of size 4x32 (the values do
571        * not matter), otherwise we'd still need (not per element though)
572        * control flow.
573        */
574       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575       if (indexes2)
576          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577    }
578 
579    /*
580     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581     */
582    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583       LLVMValueRef si, di;
584       LLVMValueRef index;
585       LLVMValueRef scalar_ptr, scalar;
586 
587       di = lp_build_const_int32(bld->gallivm, i);
588       if (indexes2)
589          si = lp_build_const_int32(bld->gallivm, i >> 1);
590       else
591          si = di;
592 
593       if (indexes2 && (i & 1)) {
594          index = LLVMBuildExtractElement(builder,
595                                          indexes2, si, "");
596       } else {
597          index = LLVMBuildExtractElement(builder,
598                                          indexes, si, "");
599       }
600       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601                                 &index, 1, "gather_ptr");
602       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603 
604       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605    }
606 
607    if (overflow_mask) {
608       if (indexes2) {
609          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611                                        bld_base->dbl_bld.int_vec_type, "");
612          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613                                bld_base->dbl_bld.zero, res);
614       } else
615          res = lp_build_select(bld, overflow_mask, bld->zero, res);
616    }
617 
618    return res;
619 }
620 
621 
622 /**
623  * Scatter/store vector.
624  */
625 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627                   LLVMValueRef base_ptr,
628                   LLVMValueRef indexes,
629                   LLVMValueRef values,
630                   struct lp_exec_mask *mask)
631 {
632    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633    LLVMBuilderRef builder = gallivm->builder;
634    unsigned i;
635    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636 
637    /*
638     * Loop over elements of index_vec, store scalar value.
639     */
640    for (i = 0; i < bld->bld_base.base.type.length; i++) {
641       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645       LLVMValueRef scalar_pred = pred ?
646          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647 
648       if (0)
649          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650                          ii, val, index, scalar_ptr);
651 
652       if (scalar_pred) {
653          LLVMValueRef real_val, dst_val;
654          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656          LLVMBuildStore(builder, real_val, scalar_ptr);
657       }
658       else {
659          LLVMBuildStore(builder, val, scalar_ptr);
660       }
661    }
662 }
663 
664 
665 /**
666  * Read the current value of the ADDR register, convert the floats to
667  * ints, add the base index and return the vector of offsets.
668  * The offsets will be used to index into the constant buffer or
669  * temporary register file.
670  */
671 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg,int index_limit)672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673                    unsigned reg_file, unsigned reg_index,
674                    const struct tgsi_ind_register *indirect_reg,
675                    int index_limit)
676 {
677    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679    /* always use X component of address register */
680    unsigned swizzle = indirect_reg->Swizzle;
681    LLVMValueRef base;
682    LLVMValueRef rel;
683    LLVMValueRef max_index;
684    LLVMValueRef index;
685 
686    assert(bld->indirect_files & (1 << reg_file));
687 
688    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689 
690    assert(swizzle < 4);
691    switch (indirect_reg->File) {
692    case TGSI_FILE_ADDRESS:
693       rel = LLVMBuildLoad(builder,
694                           bld->addr[indirect_reg->Index][swizzle],
695                           "load addr reg");
696       /* ADDR LLVM values already have LLVM integer type. */
697       break;
698    case TGSI_FILE_TEMPORARY:
699       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700       rel = LLVMBuildLoad(builder, rel, "load temp reg");
701       /* TEMP LLVM values always have LLVM float type, but for indirection, the
702        * value actually stored is expected to be an integer */
703       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704       break;
705    default:
706       assert(0);
707       rel = uint_bld->zero;
708    }
709 
710    index = lp_build_add(uint_bld, base, rel);
711 
712    /*
713     * emit_fetch_constant handles constant buffer overflow so this code
714     * is pointless for them.
715     * Furthermore the D3D10 spec in section 6.5 says:
716     * If the constant buffer bound to a slot is larger than the size
717     * declared in the shader for that slot, implementations are allowed
718     * to return incorrect data (not necessarily 0) for indices that are
719     * larger than the declared size but smaller than the buffer size.
720     */
721    if (reg_file != TGSI_FILE_CONSTANT) {
722       assert(index_limit >= 0);
723       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724                                          uint_bld->type, index_limit);
725 
726       assert(!uint_bld->type.sign);
727       index = lp_build_min(uint_bld, index, max_index);
728    }
729 
730    return index;
731 }
732 
733 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 	       enum tgsi_opcode_type stype)
736 {
737    struct lp_build_context *bld_fetch;
738 
739    switch (stype) {
740    case TGSI_TYPE_FLOAT:
741    case TGSI_TYPE_UNTYPED:
742       bld_fetch = &bld_base->base;
743       break;
744    case TGSI_TYPE_UNSIGNED:
745       bld_fetch = &bld_base->uint_bld;
746       break;
747    case TGSI_TYPE_SIGNED:
748       bld_fetch = &bld_base->int_bld;
749       break;
750    case TGSI_TYPE_DOUBLE:
751       bld_fetch = &bld_base->dbl_bld;
752       break;
753    case TGSI_TYPE_UNSIGNED64:
754       bld_fetch = &bld_base->uint64_bld;
755       break;
756    case TGSI_TYPE_SIGNED64:
757       bld_fetch = &bld_base->int64_bld;
758       break;
759    case TGSI_TYPE_VOID:
760    default:
761       assert(0);
762       bld_fetch = NULL;
763       break;
764    }
765    return bld_fetch;
766 }
767 
768 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,boolean need_perelement_offset)769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770                       LLVMValueRef indirect_index,
771                       unsigned chan_index,
772                       boolean need_perelement_offset)
773 {
774    struct gallivm_state *gallivm = uint_bld->gallivm;
775    LLVMValueRef chan_vec =
776       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777    LLVMValueRef length_vec =
778       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779    LLVMValueRef index_vec;
780 
781    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785 
786    if (need_perelement_offset) {
787       LLVMValueRef pixel_offsets;
788       unsigned i;
789      /* build pixel offset vector: {0, 1, 2, 3, ...} */
790       pixel_offsets = uint_bld->undef;
791       for (i = 0; i < uint_bld->type.length; i++) {
792          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794                                                 ii, ii, "");
795       }
796       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797    }
798    return index_vec;
799 }
800 
801 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)802 emit_fetch_constant(
803    struct lp_build_tgsi_context * bld_base,
804    const struct tgsi_full_src_register * reg,
805    enum tgsi_opcode_type stype,
806    unsigned swizzle_in)
807 {
808    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809    struct gallivm_state *gallivm = bld_base->base.gallivm;
810    LLVMBuilderRef builder = gallivm->builder;
811    struct lp_build_context *uint_bld = &bld_base->uint_bld;
812    unsigned dimension = 0;
813    LLVMValueRef consts_ptr;
814    LLVMValueRef num_consts;
815    LLVMValueRef res;
816    unsigned swizzle = swizzle_in & 0xffff;
817 
818    /* XXX: Handle fetching xyzw components as a vector */
819    assert(swizzle != ~0u);
820 
821    if (reg->Register.Dimension) {
822       assert(!reg->Dimension.Indirect);
823       dimension = reg->Dimension.Index;
824       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825    }
826 
827    consts_ptr = bld->consts[dimension];
828    num_consts = bld->consts_sizes[dimension];
829 
830    if (reg->Register.Indirect) {
831       LLVMValueRef indirect_index;
832       LLVMValueRef swizzle_vec =
833          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834       LLVMValueRef index_vec;  /* index into the const buffer */
835       LLVMValueRef overflow_mask;
836       LLVMValueRef index_vec2 = NULL;
837 
838       indirect_index = get_indirect_index(bld,
839                                           reg->Register.File,
840                                           reg->Register.Index,
841                                           &reg->Indirect,
842                                           bld->bld_base.info->file_max[reg->Register.File]);
843 
844       /* All fetches are from the same constant buffer, so
845        * we need to propagate the size to a vector to do a
846        * vector comparison */
847       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848       /* Construct a boolean vector telling us which channels
849        * overflow the bound constant buffer */
850       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851                                        indirect_index, num_consts);
852 
853       /* index_vec = indirect_index * 4 + swizzle */
854       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856 
857       if (tgsi_type_is_64bit(stype)) {
858          LLVMValueRef swizzle_vec2;
859          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862       }
863       /* Gather values from the constant buffer */
864       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865    }
866    else {
867       LLVMValueRef index;  /* index into the const buffer */
868       LLVMValueRef scalar, scalar_ptr;
869       struct lp_build_context *bld_broad = &bld_base->base;
870       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871 
872       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873                                 &index, 1, "");
874 
875       if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876 
877          LLVMValueRef scalar2, scalar2_ptr;
878          LLVMValueRef shuffles[2];
879          index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880 
881          scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882                                     &index, 1, "");
883 
884          scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885          scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886          shuffles[0] = lp_build_const_int32(gallivm, 0);
887          shuffles[1] = lp_build_const_int32(gallivm, 1);
888 
889          res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890          res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891          res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892       } else {
893         if (stype == TGSI_TYPE_DOUBLE) {
894            LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896            bld_broad = &bld_base->dbl_bld;
897         } else if (stype == TGSI_TYPE_UNSIGNED64) {
898            LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900            bld_broad = &bld_base->uint64_bld;
901         } else if (stype == TGSI_TYPE_SIGNED64) {
902            LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904            bld_broad = &bld_base->int64_bld;
905         }
906         scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907         res = lp_build_broadcast_scalar(bld_broad, scalar);
908       }
909 
910    }
911 
912    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915    }
916 
917    return res;
918 }
919 
920 /**
921  * Fetch 64-bit values from two separate channels.
922  * 64-bit values are stored split across two channels, like xy and zw.
923  * This function creates a set of vec_length*2 floats,
924  * extracts the values from the two channels,
925  * puts them in the correct place, then casts to vec_length 64-bits.
926  */
927 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)928 emit_fetch_64bit(
929    struct lp_build_tgsi_context * bld_base,
930    enum tgsi_opcode_type stype,
931    LLVMValueRef input,
932    LLVMValueRef input2)
933 {
934    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936    LLVMBuilderRef builder = gallivm->builder;
937    LLVMValueRef res;
938    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939    int i;
940    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941    int len = bld_base->base.type.length * 2;
942    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943 
944    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947    }
948    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949 
950    return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952 
953 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)954 emit_fetch_immediate(
955    struct lp_build_tgsi_context * bld_base,
956    const struct tgsi_full_src_register * reg,
957    enum tgsi_opcode_type stype,
958    unsigned swizzle_in)
959 {
960    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962    LLVMBuilderRef builder = gallivm->builder;
963    LLVMValueRef res = NULL;
964    unsigned swizzle = swizzle_in & 0xffff;
965 
966    if (bld->use_immediates_array || reg->Register.Indirect) {
967       LLVMValueRef imms_array;
968       LLVMTypeRef fptr_type;
969 
970       /* cast imms_array pointer to float* */
971       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973 
974       if (reg->Register.Indirect) {
975          LLVMValueRef indirect_index;
976          LLVMValueRef index_vec;  /* index into the immediate register array */
977          LLVMValueRef index_vec2 = NULL;
978          indirect_index = get_indirect_index(bld,
979                                              reg->Register.File,
980                                              reg->Register.Index,
981                                              &reg->Indirect,
982                                              bld->bld_base.info->file_max[reg->Register.File]);
983          /*
984           * Unlike for other reg classes, adding pixel offsets is unnecessary -
985           * immediates are stored as full vectors (FIXME??? - might be better
986           * to store them the same as constants) but all elements are the same
987           * in any case.
988           */
989          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990                                            indirect_index,
991                                            swizzle,
992                                            FALSE);
993          if (tgsi_type_is_64bit(stype))
994             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995                                               indirect_index,
996                                               swizzle_in >> 16,
997                                               FALSE);
998          /* Gather values from the immediate register array */
999          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000       } else {
1001          LLVMValueRef gep[2];
1002          gep[0] = lp_build_const_int32(gallivm, 0);
1003          gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004          LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005                                               bld->imms_array, gep, 2, "");
1006          res = LLVMBuildLoad(builder, imms_ptr, "");
1007 
1008          if (tgsi_type_is_64bit(stype)) {
1009             LLVMValueRef imms_ptr2;
1010             LLVMValueRef res2;
1011             gep[1] = lp_build_const_int32(gallivm,
1012                                           reg->Register.Index * 4 + (swizzle_in >> 16));
1013             imms_ptr2 = LLVMBuildGEP(builder,
1014                                      bld->imms_array, gep, 2, "");
1015             res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016             res = emit_fetch_64bit(bld_base, stype, res, res2);
1017          }
1018       }
1019    }
1020    else {
1021       res = bld->immediates[reg->Register.Index][swizzle];
1022       if (tgsi_type_is_64bit(stype))
1023          res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024    }
1025 
1026    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029    }
1030    return res;
1031 }
1032 
1033 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1034 emit_fetch_input(
1035    struct lp_build_tgsi_context * bld_base,
1036    const struct tgsi_full_src_register * reg,
1037    enum tgsi_opcode_type stype,
1038    unsigned swizzle_in)
1039 {
1040    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042    LLVMBuilderRef builder = gallivm->builder;
1043    LLVMValueRef res;
1044    unsigned swizzle = swizzle_in & 0xffff;
1045 
1046    if (reg->Register.Indirect) {
1047       LLVMValueRef indirect_index;
1048       LLVMValueRef index_vec;  /* index into the input reg array */
1049       LLVMValueRef index_vec2 = NULL;
1050       LLVMValueRef inputs_array;
1051       LLVMTypeRef fptr_type;
1052 
1053       indirect_index = get_indirect_index(bld,
1054                                           reg->Register.File,
1055                                           reg->Register.Index,
1056                                           &reg->Indirect,
1057                                           bld->bld_base.info->file_max[reg->Register.File]);
1058 
1059       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060                                         indirect_index,
1061                                         swizzle,
1062                                         TRUE);
1063       if (tgsi_type_is_64bit(stype)) {
1064          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065                                            indirect_index,
1066                                            swizzle_in >> 16,
1067                                            TRUE);
1068       }
1069       /* cast inputs_array pointer to float* */
1070       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072 
1073       /* Gather values from the input register array */
1074       res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075    } else {
1076       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077          LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078                                         reg->Register.Index * 4 + swizzle);
1079          LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080                                                bld->inputs_array, &lindex, 1, "");
1081 
1082          res = LLVMBuildLoad(builder, input_ptr, "");
1083          if (tgsi_type_is_64bit(stype)) {
1084             LLVMValueRef lindex1;
1085             LLVMValueRef input_ptr2;
1086             LLVMValueRef res2;
1087 
1088             lindex1 = lp_build_const_int32(gallivm,
1089                                            reg->Register.Index * 4 + (swizzle_in >> 16));
1090             input_ptr2 = LLVMBuildGEP(builder,
1091                                       bld->inputs_array, &lindex1, 1, "");
1092             res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093             res = emit_fetch_64bit(bld_base, stype, res, res2);
1094          }
1095       }
1096       else {
1097          res = bld->inputs[reg->Register.Index][swizzle];
1098          if (tgsi_type_is_64bit(stype))
1099             res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100       }
1101    }
1102 
1103    assert(res);
1104 
1105    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108    }
1109 
1110    return res;
1111 }
1112 
1113 
1114 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1115 emit_fetch_gs_input(
1116    struct lp_build_tgsi_context * bld_base,
1117    const struct tgsi_full_src_register * reg,
1118    enum tgsi_opcode_type stype,
1119    unsigned swizzle_in)
1120 {
1121    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123    const struct tgsi_shader_info *info = bld->bld_base.info;
1124    LLVMBuilderRef builder = gallivm->builder;
1125    LLVMValueRef attrib_index = NULL;
1126    LLVMValueRef vertex_index = NULL;
1127    unsigned swizzle = swizzle_in & 0xffff;
1128    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129    LLVMValueRef res;
1130 
1131    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132       /* This is really a system value not a regular input */
1133       assert(!reg->Register.Indirect);
1134       assert(!reg->Dimension.Indirect);
1135       res = bld->system_values.prim_id;
1136       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138       }
1139       return res;
1140    }
1141 
1142    if (reg->Register.Indirect) {
1143       /*
1144        * XXX: this is possibly not quite the right value, since file_max may be
1145        * larger than the max attrib index, due to it being the max of declared
1146        * inputs AND the max vertices per prim (which is 6 for tri adj).
1147        * It should however be safe to use (since we always allocate
1148        * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149        */
1150       int index_limit = info->file_max[reg->Register.File];
1151       attrib_index = get_indirect_index(bld,
1152                                         reg->Register.File,
1153                                         reg->Register.Index,
1154                                         &reg->Indirect,
1155                                         index_limit);
1156    } else {
1157       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158    }
1159 
1160    if (reg->Dimension.Indirect) {
1161       /*
1162        * A fixed 6 should do as well (which is what we allocate).
1163        */
1164       int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165       vertex_index = get_indirect_index(bld,
1166                                         reg->Register.File,
1167                                         reg->Dimension.Index,
1168                                         &reg->DimIndirect,
1169                                         index_limit);
1170    } else {
1171       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172    }
1173 
1174    res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175                                     reg->Dimension.Indirect,
1176                                     vertex_index,
1177                                     reg->Register.Indirect,
1178                                     attrib_index,
1179                                     swizzle_index);
1180 
1181    assert(res);
1182    if (tgsi_type_is_64bit(stype)) {
1183       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184       LLVMValueRef res2;
1185       res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186                                         reg->Dimension.Indirect,
1187                                         vertex_index,
1188                                         reg->Register.Indirect,
1189                                         attrib_index,
1190                                         swizzle_index);
1191       assert(res2);
1192       res = emit_fetch_64bit(bld_base, stype, res, res2);
1193    } else if (stype == TGSI_TYPE_UNSIGNED) {
1194       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195    } else if (stype == TGSI_TYPE_SIGNED) {
1196       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197    }
1198 
1199    return res;
1200 }
1201 
1202 static LLVMValueRef
emit_fetch_tcs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1203 emit_fetch_tcs_input(
1204    struct lp_build_tgsi_context * bld_base,
1205    const struct tgsi_full_src_register * reg,
1206    enum tgsi_opcode_type stype,
1207    unsigned swizzle_in)
1208 {
1209    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211    const struct tgsi_shader_info *info = bld->bld_base.info;
1212    LLVMBuilderRef builder = gallivm->builder;
1213    LLVMValueRef attrib_index = NULL;
1214    LLVMValueRef vertex_index = NULL;
1215    unsigned swizzle = swizzle_in & 0xffff;
1216    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217    LLVMValueRef res;
1218 
1219    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220       /* This is really a system value not a regular input */
1221       assert(!reg->Register.Indirect);
1222       assert(!reg->Dimension.Indirect);
1223       res = bld->system_values.prim_id;
1224       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226       }
1227       return res;
1228    }
1229 
1230    if (reg->Register.Indirect) {
1231       int index_limit = info->file_max[reg->Register.File];
1232       attrib_index = get_indirect_index(bld,
1233                                         reg->Register.File,
1234                                         reg->Register.Index,
1235                                         &reg->Indirect,
1236                                         index_limit);
1237    } else {
1238       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239    }
1240 
1241    if (reg->Dimension.Indirect) {
1242       vertex_index = get_indirect_index(bld,
1243                                         reg->Register.File,
1244                                         reg->Dimension.Index,
1245                                         &reg->DimIndirect,
1246                                         PIPE_MAX_SHADER_INPUTS);
1247    } else {
1248       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249    }
1250 
1251    // TCS can read from its own outputs
1252    if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253       res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254                                               reg->Dimension.Indirect,
1255                                               vertex_index,
1256                                               reg->Register.Indirect,
1257                                               attrib_index,
1258                                               FALSE,
1259                                               swizzle_index,
1260                                               bld_base->info->output_semantic_name[reg->Register.Index]);
1261    } else {
1262       res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1263                                              reg->Dimension.Indirect,
1264                                              vertex_index,
1265                                              reg->Register.Indirect,
1266                                              attrib_index,
1267                                              FALSE,
1268                                              swizzle_index);
1269    }
1270 
1271 
1272    assert(res);
1273    if (tgsi_type_is_64bit(stype)) {
1274       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1275       LLVMValueRef res2;
1276       if (reg->Register.File == TGSI_FILE_OUTPUT) {
1277          res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1278                                                   reg->Dimension.Indirect,
1279                                                   vertex_index,
1280                                                   reg->Register.Indirect,
1281                                                   attrib_index,
1282                                                   FALSE,
1283                                                   swizzle_index,
1284                                                   bld_base->info->output_semantic_name[reg->Register.Index]);
1285       } else {
1286          res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1287                                                  reg->Dimension.Indirect,
1288                                                  vertex_index,
1289                                                  reg->Register.Indirect,
1290                                                  attrib_index,
1291                                                  FALSE,
1292                                                  swizzle_index);
1293       }
1294       assert(res2);
1295       res = emit_fetch_64bit(bld_base, stype, res, res2);
1296    } else if (stype == TGSI_TYPE_UNSIGNED) {
1297       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1298    } else if (stype == TGSI_TYPE_SIGNED) {
1299       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1300    }
1301 
1302    return res;
1303 }
1304 
1305 static LLVMValueRef
emit_fetch_tes_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1306 emit_fetch_tes_input(
1307    struct lp_build_tgsi_context * bld_base,
1308    const struct tgsi_full_src_register * reg,
1309    enum tgsi_opcode_type stype,
1310    unsigned swizzle_in)
1311 {
1312    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314    const struct tgsi_shader_info *info = bld->bld_base.info;
1315    LLVMBuilderRef builder = gallivm->builder;
1316    LLVMValueRef attrib_index = NULL;
1317    LLVMValueRef vertex_index = NULL;
1318    unsigned swizzle = swizzle_in & 0xffff;
1319    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1320    LLVMValueRef res;
1321 
1322    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1323       /* This is really a system value not a regular input */
1324       assert(!reg->Register.Indirect);
1325       assert(!reg->Dimension.Indirect);
1326       res = bld->system_values.prim_id;
1327       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1328          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1329       }
1330       return res;
1331    }
1332 
1333    if (reg->Register.Indirect) {
1334       int index_limit = info->file_max[reg->Register.File];
1335       attrib_index = get_indirect_index(bld,
1336                                         reg->Register.File,
1337                                         reg->Register.Index,
1338                                         &reg->Indirect,
1339                                         index_limit);
1340    } else {
1341       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1342    }
1343 
1344    if (reg->Dimension.Indirect) {
1345       vertex_index = get_indirect_index(bld,
1346                                         reg->Register.File,
1347                                         reg->Dimension.Index,
1348                                         &reg->DimIndirect,
1349                                         PIPE_MAX_SHADER_INPUTS);
1350    } else {
1351       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1352    }
1353 
1354    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1355       res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1356                                      reg->Register.Indirect,
1357                                      attrib_index,
1358                                      swizzle_index);
1359    } else {
1360       res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1361                                        reg->Dimension.Indirect,
1362                                        vertex_index,
1363                                        reg->Register.Indirect,
1364                                        attrib_index,
1365                                        FALSE,
1366                                        swizzle_index);
1367    }
1368 
1369    assert(res);
1370    if (tgsi_type_is_64bit(stype)) {
1371       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1372       LLVMValueRef res2;
1373       if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1374          res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1375                                     reg->Register.Indirect,
1376                                     attrib_index,
1377                                     swizzle_index);
1378       }
1379       else {
1380          res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1381                                              reg->Dimension.Indirect,
1382                                              vertex_index,
1383                                              reg->Register.Indirect,
1384                                              attrib_index,
1385                                              FALSE,
1386                                              swizzle_index);
1387       }
1388       assert(res2);
1389       res = emit_fetch_64bit(bld_base, stype, res, res2);
1390    } else if (stype == TGSI_TYPE_UNSIGNED) {
1391       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1392    } else if (stype == TGSI_TYPE_SIGNED) {
1393       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1394    }
1395 
1396    return res;
1397 }
1398 
1399 
1400 
1401 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1402 emit_fetch_temporary(
1403    struct lp_build_tgsi_context * bld_base,
1404    const struct tgsi_full_src_register * reg,
1405    enum tgsi_opcode_type stype,
1406    unsigned swizzle_in)
1407 {
1408    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1409    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1410    LLVMBuilderRef builder = gallivm->builder;
1411    LLVMValueRef res;
1412    unsigned swizzle = swizzle_in & 0xffff;
1413 
1414    if (reg->Register.Indirect) {
1415       LLVMValueRef indirect_index;
1416       LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
1417       LLVMValueRef temps_array;
1418       LLVMTypeRef fptr_type;
1419 
1420       indirect_index = get_indirect_index(bld,
1421                                           reg->Register.File,
1422                                           reg->Register.Index,
1423                                           &reg->Indirect,
1424                                           bld->bld_base.info->file_max[reg->Register.File]);
1425 
1426       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1427                                         indirect_index,
1428                                         swizzle,
1429                                         TRUE);
1430       if (tgsi_type_is_64bit(stype)) {
1431                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1432                                                   indirect_index,
1433                                                   swizzle_in >> 16,
1434                                                   TRUE);
1435       }
1436 
1437       /* cast temps_array pointer to float* */
1438       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1439       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1440 
1441       /* Gather values from the temporary register array */
1442       res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1443    }
1444    else {
1445       LLVMValueRef temp_ptr;
1446       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1447       res = LLVMBuildLoad(builder, temp_ptr, "");
1448 
1449       if (tgsi_type_is_64bit(stype)) {
1450          LLVMValueRef temp_ptr2, res2;
1451 
1452          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1453          res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1454          res = emit_fetch_64bit(bld_base, stype, res, res2);
1455       }
1456    }
1457 
1458    if (stype == TGSI_TYPE_SIGNED ||
1459        stype == TGSI_TYPE_UNSIGNED ||
1460        stype == TGSI_TYPE_DOUBLE ||
1461        stype == TGSI_TYPE_SIGNED64 ||
1462        stype == TGSI_TYPE_UNSIGNED64) {
1463       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1464       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1465    }
1466 
1467    return res;
1468 }
1469 
1470 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1471 emit_fetch_system_value(
1472    struct lp_build_tgsi_context * bld_base,
1473    const struct tgsi_full_src_register * reg,
1474    enum tgsi_opcode_type stype,
1475    unsigned swizzle_in)
1476 {
1477    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1478    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1479    const struct tgsi_shader_info *info = bld->bld_base.info;
1480    LLVMBuilderRef builder = gallivm->builder;
1481    LLVMValueRef res;
1482    enum tgsi_opcode_type atype; // Actual type of the value
1483    unsigned swizzle = swizzle_in & 0xffff;
1484 
1485    assert(!reg->Register.Indirect);
1486 
1487    switch (info->system_value_semantic_name[reg->Register.Index]) {
1488    case TGSI_SEMANTIC_INSTANCEID:
1489       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1490       atype = TGSI_TYPE_UNSIGNED;
1491       break;
1492 
1493    case TGSI_SEMANTIC_VERTEXID:
1494       res = bld->system_values.vertex_id;
1495       atype = TGSI_TYPE_UNSIGNED;
1496       break;
1497 
1498    case TGSI_SEMANTIC_VERTEXID_NOBASE:
1499       res = bld->system_values.vertex_id_nobase;
1500       atype = TGSI_TYPE_UNSIGNED;
1501       break;
1502 
1503    case TGSI_SEMANTIC_BASEVERTEX:
1504       res = bld->system_values.basevertex;
1505       atype = TGSI_TYPE_UNSIGNED;
1506       break;
1507 
1508    case TGSI_SEMANTIC_BASEINSTANCE:
1509       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1510       atype = TGSI_TYPE_UNSIGNED;
1511       break;
1512 
1513    case TGSI_SEMANTIC_PRIMID:
1514       res = bld->system_values.prim_id;
1515       atype = TGSI_TYPE_UNSIGNED;
1516       break;
1517 
1518    case TGSI_SEMANTIC_INVOCATIONID:
1519       if (info->processor == PIPE_SHADER_TESS_CTRL)
1520          res = bld->system_values.invocation_id;
1521       else
1522          res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1523       atype = TGSI_TYPE_UNSIGNED;
1524       break;
1525 
1526    case TGSI_SEMANTIC_HELPER_INVOCATION:
1527       res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1528       atype = TGSI_TYPE_UNSIGNED;
1529       break;
1530 
1531    case TGSI_SEMANTIC_THREAD_ID:
1532       res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1533       atype = TGSI_TYPE_UNSIGNED;
1534       break;
1535 
1536    case TGSI_SEMANTIC_BLOCK_ID:
1537       res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1538       atype = TGSI_TYPE_UNSIGNED;
1539       break;
1540 
1541    case TGSI_SEMANTIC_GRID_SIZE:
1542       res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1543       atype = TGSI_TYPE_UNSIGNED;
1544       break;
1545 
1546    case TGSI_SEMANTIC_TESSCOORD:
1547       {
1548          LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1549          LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1550          res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1551       }
1552       atype = TGSI_TYPE_FLOAT;
1553       break;
1554 
1555    case TGSI_SEMANTIC_FACE:
1556       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1557       atype = TGSI_TYPE_UNSIGNED;
1558       break;
1559 
1560   case TGSI_SEMANTIC_DRAWID:
1561       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1562       atype = TGSI_TYPE_UNSIGNED;
1563       break;
1564 
1565   case TGSI_SEMANTIC_SAMPLEID:
1566       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1567       atype = TGSI_TYPE_UNSIGNED;
1568       break;
1569 
1570    case TGSI_SEMANTIC_TESSOUTER:
1571       res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1572                                        bld->system_values.tess_outer,
1573                                        lp_build_const_int32(gallivm, swizzle_in));
1574       atype = TGSI_TYPE_FLOAT;
1575       break;
1576 
1577    case TGSI_SEMANTIC_TESSINNER:
1578       res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1579                                        bld->system_values.tess_inner,
1580                                        lp_build_const_int32(gallivm, swizzle_in));
1581       atype = TGSI_TYPE_FLOAT;
1582       break;
1583 
1584    case TGSI_SEMANTIC_VERTICESIN:
1585       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1586       atype = TGSI_TYPE_UNSIGNED;
1587       break;
1588 
1589    default:
1590       assert(!"unexpected semantic in emit_fetch_system_value");
1591       res = bld_base->base.zero;
1592       atype = TGSI_TYPE_FLOAT;
1593       break;
1594    }
1595 
1596    if (atype != stype) {
1597       if (stype == TGSI_TYPE_FLOAT) {
1598          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1599       } else if (stype == TGSI_TYPE_UNSIGNED) {
1600          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1601       } else if (stype == TGSI_TYPE_SIGNED) {
1602          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1603       }
1604    }
1605 
1606    return res;
1607 }
1608 
1609 /**
1610  * Register fetch with derivatives.
1611  */
1612 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1613 emit_fetch_deriv(
1614    struct lp_build_tgsi_soa_context *bld,
1615    LLVMValueRef src,
1616    LLVMValueRef *res,
1617    LLVMValueRef *ddx,
1618    LLVMValueRef *ddy)
1619 {
1620    if (res)
1621       *res = src;
1622 
1623    /* TODO: use interpolation coeffs for inputs */
1624 
1625    if (ddx)
1626       *ddx = lp_build_ddx(&bld->bld_base.base, src);
1627 
1628    if (ddy)
1629       *ddy = lp_build_ddy(&bld->bld_base.base, src);
1630 }
1631 
1632 /**
1633  * store an array of vec-length 64-bit into two arrays of vec_length floats
1634  * i.e.
1635  * value is d0, d1, d2, d3 etc.
1636  * each 64-bit has high and low pieces x, y
1637  * so gets stored into the separate channels as:
1638  * chan_ptr = d0.x, d1.x, d2.x, d3.x
1639  * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1640  */
1641 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1642 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1643                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1644                       LLVMValueRef value)
1645 {
1646    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1647    struct gallivm_state *gallivm = bld_base->base.gallivm;
1648    LLVMBuilderRef builder = gallivm->builder;
1649    struct lp_build_context *float_bld = &bld_base->base;
1650    unsigned i;
1651    LLVMValueRef temp, temp2;
1652    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1653    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1654 
1655    for (i = 0; i < bld_base->base.type.length; i++) {
1656       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1657       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1658    }
1659 
1660    temp = LLVMBuildShuffleVector(builder, value,
1661                                  LLVMGetUndef(LLVMTypeOf(value)),
1662                                  LLVMConstVector(shuffles,
1663                                                  bld_base->base.type.length),
1664                                  "");
1665    temp2 = LLVMBuildShuffleVector(builder, value,
1666                                   LLVMGetUndef(LLVMTypeOf(value)),
1667                                   LLVMConstVector(shuffles2,
1668                                                   bld_base->base.type.length),
1669                                   "");
1670 
1671    lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1672    lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1673 }
1674 
1675 static void
emit_store_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1676 emit_store_output(struct lp_build_tgsi_context *bld_base,
1677                   enum tgsi_opcode_type dtype,
1678                   const struct tgsi_full_dst_register *reg,
1679                   unsigned index,
1680                   unsigned chan_index,
1681                   LLVMValueRef indirect_index,
1682                   LLVMValueRef value)
1683 {
1684    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1685    struct gallivm_state *gallivm = bld_base->base.gallivm;
1686    LLVMBuilderRef builder = gallivm->builder;
1687    struct lp_build_context *float_bld = &bld_base->base;
1688 
1689    /* Outputs are always stored as floats */
1690    value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1691 
1692    if (reg->Register.Indirect) {
1693       LLVMValueRef index_vec;  /* indexes into the output registers */
1694       LLVMValueRef outputs_array;
1695       LLVMTypeRef fptr_type;
1696 
1697       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1698                                           indirect_index,
1699                                           chan_index,
1700                                           TRUE);
1701 
1702       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1703       outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1704 
1705       /* Scatter store values into output registers */
1706       emit_mask_scatter(bld, outputs_array, index_vec, value,
1707                         &bld->exec_mask);
1708    }
1709    else {
1710       assert(LLVMTypeOf(value) == float_bld->vec_type);
1711       LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1712                                                 chan_index);
1713 
1714       if (tgsi_type_is_64bit(dtype)) {
1715          LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1716                                                    chan_index + 1);
1717          emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1718                                  value);
1719       } else
1720          lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1721    }
1722 }
1723 
1724 static void
emit_store_tcs_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1725 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1726                       enum tgsi_opcode_type dtype,
1727                       const struct tgsi_full_dst_register *reg,
1728                       unsigned index,
1729                       unsigned chan_index,
1730                       LLVMValueRef indirect_index,
1731                       LLVMValueRef value)
1732 {
1733    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1734    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1735    const struct tgsi_shader_info *info = bld->bld_base.info;
1736    LLVMValueRef attrib_index = NULL;
1737    LLVMValueRef vertex_index = NULL;
1738    LLVMValueRef channel_index = NULL;
1739 
1740    if (reg->Register.Indirect) {
1741       /*
1742        * XXX: this is possibly not quite the right value, since file_max may be
1743        * larger than the max attrib index, due to it being the max of declared
1744        * inputs AND the max vertices per prim (which is 6 for tri adj).
1745        * It should however be safe to use (since we always allocate
1746        * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1747        */
1748       int index_limit = info->file_max[reg->Register.File];
1749       attrib_index = get_indirect_index(bld,
1750                                         reg->Register.File,
1751                                         reg->Register.Index,
1752                                         &reg->Indirect,
1753                                         index_limit);
1754    } else {
1755       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1756    }
1757 
1758    if (reg->Dimension.Indirect) {
1759       vertex_index = get_indirect_index(bld,
1760                                         reg->Register.File,
1761                                         reg->Dimension.Index,
1762                                         &reg->DimIndirect,
1763                                         PIPE_MAX_SHADER_OUTPUTS);
1764    } else {
1765       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1766    }
1767 
1768    channel_index = lp_build_const_int32(gallivm, chan_index);
1769 
1770    assert(bld->tcs_iface->emit_store_output);
1771    bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1772                                           bld_base->info->output_semantic_name[reg->Register.Index],
1773                                           reg->Dimension.Indirect,
1774                                           vertex_index,
1775                                           reg->Register.Indirect,
1776                                           attrib_index,
1777                                           false,
1778                                           channel_index,
1779                                           value,
1780                                           mask_vec(bld_base));
1781 }
1782 
1783 static void
emit_store_temp(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1784 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1785                   enum tgsi_opcode_type dtype,
1786                   const struct tgsi_full_dst_register *reg,
1787                   unsigned index,
1788                   unsigned chan_index,
1789                   LLVMValueRef indirect_index,
1790                   LLVMValueRef value)
1791 {
1792    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1793    struct gallivm_state *gallivm = bld_base->base.gallivm;
1794    LLVMBuilderRef builder = gallivm->builder;
1795    struct lp_build_context *float_bld = &bld_base->base;
1796 
1797    /* Temporaries are always stored as floats */
1798    if (!tgsi_type_is_64bit(dtype))
1799       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1800    else
1801       value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1802 
1803    if (reg->Register.Indirect) {
1804       LLVMValueRef index_vec;  /* indexes into the temp registers */
1805       LLVMValueRef temps_array;
1806       LLVMTypeRef fptr_type;
1807 
1808       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1809                                           indirect_index,
1810                                           chan_index,
1811                                           TRUE);
1812 
1813       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1814       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1815 
1816       /* Scatter store values into temp registers */
1817       emit_mask_scatter(bld, temps_array, index_vec, value,
1818                         &bld->exec_mask);
1819    }
1820    else {
1821       LLVMValueRef temp_ptr;
1822       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1823 
1824       if (tgsi_type_is_64bit(dtype)) {
1825          LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1826                                                       reg->Register.Index,
1827                                                       chan_index + 1);
1828          emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1829                                  value);
1830       }
1831       else
1832          lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1833    }
1834 }
1835 
1836 static void
emit_store_address(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1837 emit_store_address(struct lp_build_tgsi_context *bld_base,
1838                    enum tgsi_opcode_type dtype,
1839                    const struct tgsi_full_dst_register *reg,
1840                    unsigned index,
1841                    unsigned chan_index,
1842                    LLVMValueRef indirect_index,
1843                    LLVMValueRef value)
1844 {
1845    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1846    struct gallivm_state *gallivm = bld_base->base.gallivm;
1847    LLVMBuilderRef builder = gallivm->builder;
1848    struct lp_build_context *int_bld = &bld_base->int_bld;
1849 
1850    assert(dtype == TGSI_TYPE_SIGNED);
1851    assert(LLVMTypeOf(value) == int_bld->vec_type);
1852    value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1853    lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1854                         bld->addr[reg->Register.Index][chan_index]);
1855 }
1856 
1857 /**
1858  * Register store.
1859  */
1860 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1861 emit_store_chan(
1862    struct lp_build_tgsi_context *bld_base,
1863    const struct tgsi_full_instruction *inst,
1864    unsigned index,
1865    unsigned chan_index,
1866    LLVMValueRef value)
1867 {
1868    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1869    struct gallivm_state *gallivm = bld_base->base.gallivm;
1870    LLVMBuilderRef builder = gallivm->builder;
1871    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1872    struct lp_build_context *float_bld = &bld_base->base;
1873    LLVMValueRef indirect_index = NULL;
1874    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1875 
1876    /*
1877     * Apply saturation.
1878     *
1879     * It is always assumed to be float.
1880     */
1881    if (inst->Instruction.Saturate) {
1882       assert(dtype == TGSI_TYPE_FLOAT ||
1883              dtype == TGSI_TYPE_UNTYPED);
1884       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1885       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1886    }
1887 
1888    if (reg->Register.Indirect) {
1889       /*
1890        * Currently the mesa/st doesn't generate indirect stores
1891        * to 64-bit values, it normally uses MOV to do indirect stores.
1892        */
1893       assert(!tgsi_type_is_64bit(dtype));
1894       indirect_index = get_indirect_index(bld,
1895                                           reg->Register.File,
1896                                           reg->Register.Index,
1897                                           &reg->Indirect,
1898                                           bld->bld_base.info->file_max[reg->Register.File]);
1899    } else {
1900       assert(reg->Register.Index <=
1901                              bld_base->info->file_max[reg->Register.File]);
1902    }
1903 
1904    if (DEBUG_EXECUTION) {
1905       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1906    }
1907 
1908    assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1909    bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1910                                                       dtype,
1911                                                       reg,
1912                                                       index,
1913                                                       chan_index,
1914                                                       indirect_index,
1915                                                       value);
1916 
1917    (void)dtype;
1918 }
1919 
1920 /*
1921  * Called at the beginning of the translation of each TGSI instruction, to
1922  * emit some debug code.
1923  */
1924 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1925 emit_debug(
1926    struct lp_build_tgsi_context * bld_base,
1927    const struct tgsi_full_instruction * inst,
1928    const struct tgsi_opcode_info * info)
1929 
1930 {
1931    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1932 
1933    if (DEBUG_EXECUTION) {
1934       /*
1935        * Dump the TGSI instruction.
1936        */
1937 
1938       struct gallivm_state *gallivm = bld_base->base.gallivm;
1939       char buf[512];
1940       buf[0] = '$';
1941       buf[1] = ' ';
1942       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1943       lp_build_printf(gallivm, buf);
1944 
1945       /* Dump the execution mask.
1946        */
1947       if (bld->exec_mask.has_mask) {
1948          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
1949       }
1950    }
1951 }
1952 
1953 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1954 emit_store(
1955    struct lp_build_tgsi_context * bld_base,
1956    const struct tgsi_full_instruction * inst,
1957    const struct tgsi_opcode_info * info,
1958    unsigned index,
1959    LLVMValueRef dst[4])
1960 
1961 {
1962    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1963 
1964    unsigned writemask = inst->Dst[index].Register.WriteMask;
1965    while (writemask) {
1966       unsigned chan_index = u_bit_scan(&writemask);
1967       if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1968           continue;
1969       emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1970    }
1971 }
1972 
1973 static unsigned
tgsi_to_pipe_tex_target(unsigned tgsi_target)1974 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1975 {
1976    switch (tgsi_target) {
1977    case TGSI_TEXTURE_BUFFER:
1978       return PIPE_BUFFER;
1979    case TGSI_TEXTURE_1D:
1980    case TGSI_TEXTURE_SHADOW1D:
1981       return PIPE_TEXTURE_1D;
1982    case TGSI_TEXTURE_2D:
1983    case TGSI_TEXTURE_SHADOW2D:
1984    case TGSI_TEXTURE_2D_MSAA:
1985       return PIPE_TEXTURE_2D;
1986    case TGSI_TEXTURE_3D:
1987       return PIPE_TEXTURE_3D;
1988    case TGSI_TEXTURE_CUBE:
1989    case TGSI_TEXTURE_SHADOWCUBE:
1990       return PIPE_TEXTURE_CUBE;
1991    case TGSI_TEXTURE_RECT:
1992    case TGSI_TEXTURE_SHADOWRECT:
1993       return PIPE_TEXTURE_RECT;
1994    case TGSI_TEXTURE_1D_ARRAY:
1995    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1996       return PIPE_TEXTURE_1D_ARRAY;
1997    case TGSI_TEXTURE_2D_ARRAY:
1998    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1999    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2000       return PIPE_TEXTURE_2D_ARRAY;
2001    case TGSI_TEXTURE_CUBE_ARRAY:
2002    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2003       return PIPE_TEXTURE_CUBE_ARRAY;
2004    default:
2005       assert(0);
2006       return PIPE_BUFFER;
2007    }
2008 }
2009 
2010 
2011 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2012 lp_build_lod_property(
2013    struct lp_build_tgsi_context *bld_base,
2014    const struct tgsi_full_instruction *inst,
2015    unsigned src_op)
2016 {
2017    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2018    enum lp_sampler_lod_property lod_property;
2019 
2020    /*
2021     * Not much we can do here. We could try catching inputs declared
2022     * with constant interpolation but not sure it's worth it - since for
2023     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2024     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2025     * like the constant/immediate recognition below.
2026     * What seems to be of more value would be to recognize temps holding
2027     * broadcasted scalars but no way we can do it.
2028     * Tried asking llvm but without any success (using LLVMIsConstant
2029     * even though this isn't exactly what we'd need), even as simple as
2030     * IMM[0] UINT32 (0,-1,0,0)
2031     * MOV TEMP[0] IMM[0].yyyy
2032     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2033     * doesn't work.
2034     * This means there's ZERO chance this will ever catch a scalar lod
2035     * with traditional tex opcodes as well as texel fetches, since the lod
2036     * comes from the same reg as coords (except some test shaders using
2037     * constant coords maybe).
2038     * There's at least hope for sample opcodes as well as size queries.
2039     */
2040    if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2041        reg->Register.File == TGSI_FILE_CONSTANT ||
2042        reg->Register.File == TGSI_FILE_IMMEDIATE) {
2043       lod_property = LP_SAMPLER_LOD_SCALAR;
2044    }
2045    else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2046       if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2047          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2048       }
2049       else {
2050          lod_property = LP_SAMPLER_LOD_PER_QUAD;
2051       }
2052    }
2053    else {
2054       /* never use scalar (per-quad) lod the results are just too wrong. */
2055       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2056    }
2057    return lod_property;
2058 }
2059 
2060 
2061 /**
2062  * High-level instruction translators.
2063  */
2064 
2065 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2066 emit_tex( struct lp_build_tgsi_soa_context *bld,
2067           const struct tgsi_full_instruction *inst,
2068           enum lp_build_tex_modifier modifier,
2069           LLVMValueRef *texel,
2070           unsigned sampler_reg,
2071           enum lp_sampler_op_type sampler_op)
2072 {
2073    unsigned unit = inst->Src[sampler_reg].Register.Index;
2074    LLVMValueRef oow = NULL;
2075    LLVMValueRef lod = NULL;
2076    LLVMValueRef coords[5];
2077    LLVMValueRef offsets[3] = { NULL };
2078    struct lp_derivatives derivs;
2079    struct lp_sampler_params params;
2080    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2081    unsigned num_derivs, num_offsets, i;
2082    unsigned shadow_coord = 0;
2083    unsigned layer_coord = 0;
2084    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2085 
2086    memset(&params, 0, sizeof(params));
2087 
2088    if (!bld->sampler) {
2089       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2090       for (i = 0; i < 4; i++) {
2091          texel[i] = bld->bld_base.base.undef;
2092       }
2093       return;
2094    }
2095 
2096    switch (inst->Texture.Texture) {
2097    case TGSI_TEXTURE_1D_ARRAY:
2098       layer_coord = 1;
2099       FALLTHROUGH;
2100    case TGSI_TEXTURE_1D:
2101       num_offsets = 1;
2102       num_derivs = 1;
2103       break;
2104    case TGSI_TEXTURE_2D_ARRAY:
2105       layer_coord = 2;
2106       FALLTHROUGH;
2107    case TGSI_TEXTURE_2D:
2108    case TGSI_TEXTURE_RECT:
2109       num_offsets = 2;
2110       num_derivs = 2;
2111       break;
2112    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2113       layer_coord = 1;
2114       FALLTHROUGH;
2115    case TGSI_TEXTURE_SHADOW1D:
2116       shadow_coord = 2;
2117       num_offsets = 1;
2118       num_derivs = 1;
2119       break;
2120    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2121       layer_coord = 2;
2122       shadow_coord = 3;
2123       num_offsets = 2;
2124       num_derivs = 2;
2125       break;
2126    case TGSI_TEXTURE_SHADOW2D:
2127    case TGSI_TEXTURE_SHADOWRECT:
2128       shadow_coord = 2;
2129       num_offsets = 2;
2130       num_derivs = 2;
2131       break;
2132    case TGSI_TEXTURE_CUBE:
2133       num_offsets = 2;
2134       num_derivs = 3;
2135       break;
2136    case TGSI_TEXTURE_3D:
2137       num_offsets = 3;
2138       num_derivs = 3;
2139       break;
2140    case TGSI_TEXTURE_SHADOWCUBE:
2141       shadow_coord = 3;
2142       num_offsets = 2;
2143       num_derivs = 3;
2144       break;
2145    case TGSI_TEXTURE_CUBE_ARRAY:
2146       num_offsets = 2;
2147       num_derivs = 3;
2148       layer_coord = 3;
2149       break;
2150    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2151       num_offsets = 2;
2152       num_derivs = 3;
2153       layer_coord = 3;
2154       shadow_coord = 4; /* shadow coord special different reg */
2155       break;
2156    case TGSI_TEXTURE_2D_MSAA:
2157    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2158    default:
2159       assert(0);
2160       return;
2161    }
2162 
2163    /* Note lod and especially projected are illegal in a LOT of cases */
2164    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2165        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2166       if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2167          lod = bld->bld_base.base.zero;
2168       } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2169                  inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2170          /* note that shadow cube array with bias/explicit lod does not exist */
2171          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2172       }
2173       else {
2174          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2175       }
2176       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2177          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2178       }
2179       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2180          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2181       }
2182       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2183    }
2184 
2185    if (sampler_op == LP_SAMPLER_OP_GATHER) {
2186       uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2187       sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2188    }
2189    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2190       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2191       oow = lp_build_rcp(&bld->bld_base.base, oow);
2192    }
2193 
2194    for (i = 0; i < num_derivs; i++) {
2195       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2196       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2197          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2198    }
2199    for (i = num_derivs; i < 5; i++) {
2200       coords[i] = bld->bld_base.base.undef;
2201    }
2202 
2203    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2204    if (layer_coord) {
2205       if (layer_coord == 3) {
2206          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2207       }
2208       else {
2209          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2210       }
2211       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2212          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2213    }
2214    /* Shadow coord occupies always 5th slot. */
2215    if (shadow_coord) {
2216       sample_key |= LP_SAMPLER_SHADOW;
2217       if (shadow_coord == 4) {
2218          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2219       }
2220       else {
2221          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2222       }
2223       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2224          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2225    }
2226 
2227    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2228       unsigned dim;
2229       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2230       for (dim = 0; dim < num_derivs; ++dim) {
2231          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2232          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2233       }
2234       params.derivs = &derivs;
2235       /*
2236        * could also check all src regs if constant but I doubt such
2237        * cases exist in practice.
2238        */
2239       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2240          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2241             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2242          }
2243          else {
2244             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2245          }
2246       }
2247       else {
2248          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2249       }
2250    }
2251    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2252 
2253    /* we don't handle the 4 offset version of tg4 */
2254    if (inst->Texture.NumOffsets == 1) {
2255       unsigned dim;
2256       sample_key |= LP_SAMPLER_OFFSETS;
2257       for (dim = 0; dim < num_offsets; dim++) {
2258          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2259       }
2260    }
2261 
2262    params.type = bld->bld_base.base.type;
2263    params.sample_key = sample_key;
2264    params.texture_index = unit;
2265    params.sampler_index = unit;
2266    params.context_ptr = bld->context_ptr;
2267    params.thread_data_ptr = bld->thread_data_ptr;
2268    params.coords = coords;
2269    params.offsets = offsets;
2270    params.lod = lod;
2271    params.texel = texel;
2272 
2273    bld->sampler->emit_tex_sample(bld->sampler,
2274                                  bld->bld_base.base.gallivm,
2275                                  &params);
2276 }
2277 
2278 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,boolean compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2279 emit_sample(struct lp_build_tgsi_soa_context *bld,
2280             const struct tgsi_full_instruction *inst,
2281             enum lp_build_tex_modifier modifier,
2282             boolean compare,
2283             enum lp_sampler_op_type sample_type,
2284             LLVMValueRef *texel)
2285 {
2286    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2287    unsigned texture_unit, sampler_unit;
2288    LLVMValueRef lod = NULL;
2289    LLVMValueRef coords[5];
2290    LLVMValueRef offsets[3] = { NULL };
2291    struct lp_derivatives derivs;
2292    struct lp_sampler_params params;
2293    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2294 
2295    unsigned num_offsets, num_derivs, i;
2296    unsigned layer_coord = 0;
2297    unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2298 
2299    memset(&params, 0, sizeof(params));
2300 
2301    if (!bld->sampler) {
2302       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2303       for (i = 0; i < 4; i++) {
2304          texel[i] = bld->bld_base.base.undef;
2305       }
2306       return;
2307    }
2308 
2309    /*
2310     * unlike old-style tex opcodes the texture/sampler indices
2311     * always come from src1 and src2 respectively.
2312     */
2313    texture_unit = inst->Src[1].Register.Index;
2314    sampler_unit = inst->Src[2].Register.Index;
2315 
2316    /*
2317     * Note inst->Texture.Texture will contain the number of offsets,
2318     * however the target information is NOT there and comes from the
2319     * declared sampler views instead.
2320     */
2321    switch (bld->sv[texture_unit].Resource) {
2322    case TGSI_TEXTURE_1D:
2323       num_offsets = 1;
2324       num_derivs = 1;
2325       break;
2326    case TGSI_TEXTURE_1D_ARRAY:
2327       layer_coord = 1;
2328       num_offsets = 1;
2329       num_derivs = 1;
2330       break;
2331    case TGSI_TEXTURE_2D:
2332    case TGSI_TEXTURE_RECT:
2333       num_offsets = 2;
2334       num_derivs = 2;
2335       break;
2336    case TGSI_TEXTURE_2D_ARRAY:
2337       layer_coord = 2;
2338       num_offsets = 2;
2339       num_derivs = 2;
2340       break;
2341    case TGSI_TEXTURE_CUBE:
2342       num_offsets = 2;
2343       num_derivs = 3;
2344       break;
2345    case TGSI_TEXTURE_3D:
2346       num_offsets = 3;
2347       num_derivs = 3;
2348       break;
2349    case TGSI_TEXTURE_CUBE_ARRAY:
2350       layer_coord = 3;
2351       num_offsets = 2;
2352       num_derivs = 3;
2353       break;
2354    default:
2355       assert(0);
2356       return;
2357    }
2358 
2359    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2360        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2361       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2362       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2363          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2364       }
2365       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2366          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2367       }
2368       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2369    }
2370    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2371       /* XXX might be better to explicitly pass the level zero information */
2372       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2373       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2374    }
2375 
2376    for (i = 0; i < num_derivs; i++) {
2377       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2378    }
2379    for (i = num_derivs; i < 5; i++) {
2380       coords[i] = bld->bld_base.base.undef;
2381    }
2382 
2383    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2384    if (layer_coord) {
2385       if (layer_coord == 3)
2386          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2387       else
2388          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2389    }
2390    /* Shadow coord occupies always 5th slot. */
2391    if (compare) {
2392       sample_key |= LP_SAMPLER_SHADOW;
2393       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2394    }
2395 
2396    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2397       unsigned dim;
2398       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2399       for (dim = 0; dim < num_derivs; ++dim) {
2400          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2401          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2402       }
2403       params.derivs = &derivs;
2404       /*
2405        * could also check all src regs if constant but I doubt such
2406        * cases exist in practice.
2407        */
2408       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2409          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2410             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2411          }
2412          else {
2413             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2414          }
2415       }
2416       else {
2417          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2418       }
2419    }
2420 
2421    /* some advanced gather instructions (txgo) would require 4 offsets */
2422    if (inst->Texture.NumOffsets == 1) {
2423       unsigned dim;
2424       sample_key |= LP_SAMPLER_OFFSETS;
2425       for (dim = 0; dim < num_offsets; dim++) {
2426          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2427       }
2428    }
2429    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2430 
2431    params.type = bld->bld_base.base.type;
2432    params.sample_key = sample_key;
2433    params.texture_index = texture_unit;
2434    params.sampler_index = sampler_unit;
2435    params.context_ptr = bld->context_ptr;
2436    params.thread_data_ptr = bld->thread_data_ptr;
2437    params.coords = coords;
2438    params.offsets = offsets;
2439    params.lod = lod;
2440    params.texel = texel;
2441 
2442    bld->sampler->emit_tex_sample(bld->sampler,
2443                                  bld->bld_base.base.gallivm,
2444                                  &params);
2445 
2446    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2447        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2448        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2449        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2450       unsigned char swizzles[4];
2451       swizzles[0] = inst->Src[1].Register.SwizzleX;
2452       swizzles[1] = inst->Src[1].Register.SwizzleY;
2453       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2454       swizzles[3] = inst->Src[1].Register.SwizzleW;
2455 
2456       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2457    }
2458 }
2459 
2460 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,boolean is_samplei)2461 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2462                    const struct tgsi_full_instruction *inst,
2463                    LLVMValueRef *texel,
2464                    boolean is_samplei)
2465 {
2466    unsigned unit, target;
2467    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2468    LLVMValueRef explicit_lod = NULL;
2469    LLVMValueRef coords[5];
2470    LLVMValueRef offsets[3] = { NULL };
2471    LLVMValueRef ms_index = NULL;
2472    struct lp_sampler_params params;
2473    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2474    unsigned dims, i;
2475    unsigned layer_coord = 0;
2476    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2477 
2478    memset(&params, 0, sizeof(params));
2479 
2480    if (!bld->sampler) {
2481       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2482       for (i = 0; i < 4; i++) {
2483          texel[i] = coord_undef;
2484       }
2485       return;
2486    }
2487 
2488    unit = inst->Src[1].Register.Index;
2489 
2490    if (is_samplei) {
2491       target = bld->sv[unit].Resource;
2492    }
2493    else {
2494       target = inst->Texture.Texture;
2495    }
2496 
2497    switch (target) {
2498    case TGSI_TEXTURE_1D:
2499    case TGSI_TEXTURE_BUFFER:
2500       dims = 1;
2501       break;
2502    case TGSI_TEXTURE_1D_ARRAY:
2503       layer_coord = 1;
2504       dims = 1;
2505       break;
2506    case TGSI_TEXTURE_2D:
2507    case TGSI_TEXTURE_RECT:
2508    case TGSI_TEXTURE_2D_MSAA:
2509       dims = 2;
2510       break;
2511    case TGSI_TEXTURE_2D_ARRAY:
2512    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2513       layer_coord = 2;
2514       dims = 2;
2515       break;
2516    case TGSI_TEXTURE_3D:
2517       dims = 3;
2518       break;
2519    default:
2520       assert(0);
2521       return;
2522    }
2523 
2524    /* always have lod except for buffers and msaa targets ? */
2525    if (target != TGSI_TEXTURE_BUFFER &&
2526        target != TGSI_TEXTURE_2D_MSAA &&
2527        target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2528        inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2529       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2530       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2531       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2532    }
2533 
2534    if (target == TGSI_TEXTURE_2D_MSAA ||
2535        target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2536       sample_key |= LP_SAMPLER_FETCH_MS;
2537       ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2538    }
2539 
2540    /*
2541     * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2542     * would be the sample index.
2543     */
2544 
2545    for (i = 0; i < dims; i++) {
2546       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2547    }
2548    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2549    for (i = dims; i < 5; i++) {
2550       coords[i] = coord_undef;
2551    }
2552    if (layer_coord)
2553       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2554 
2555    if (inst->Texture.NumOffsets == 1) {
2556       unsigned dim;
2557       sample_key |= LP_SAMPLER_OFFSETS;
2558       for (dim = 0; dim < dims; dim++) {
2559          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2560       }
2561    }
2562    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2563 
2564    params.type = bld->bld_base.base.type;
2565    params.sample_key = sample_key;
2566    params.texture_index = unit;
2567    /*
2568     * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2569     * and trigger some assertions with d3d10 where the sampler view number
2570     * can exceed this.
2571     */
2572    params.sampler_index = 0;
2573    params.context_ptr = bld->context_ptr;
2574    params.thread_data_ptr = bld->thread_data_ptr;
2575    params.coords = coords;
2576    params.offsets = offsets;
2577    params.derivs = NULL;
2578    params.lod = explicit_lod;
2579    params.texel = texel;
2580    params.ms_index = ms_index;
2581 
2582    bld->sampler->emit_tex_sample(bld->sampler,
2583                                  bld->bld_base.base.gallivm,
2584                                  &params);
2585 
2586    if (is_samplei &&
2587        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2588         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2589         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2590         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2591       unsigned char swizzles[4];
2592       swizzles[0] = inst->Src[1].Register.SwizzleX;
2593       swizzles[1] = inst->Src[1].Register.SwizzleY;
2594       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2595       swizzles[3] = inst->Src[1].Register.SwizzleW;
2596 
2597       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2598    }
2599 }
2600 
2601 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,boolean is_sviewinfo)2602 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2603                  const struct tgsi_full_instruction *inst,
2604                  LLVMValueRef *sizes_out,
2605                  boolean is_sviewinfo)
2606 {
2607    LLVMValueRef explicit_lod;
2608    enum lp_sampler_lod_property lod_property;
2609    unsigned has_lod;
2610    unsigned i;
2611    unsigned unit = inst->Src[1].Register.Index;
2612    unsigned target, pipe_target;
2613    struct lp_sampler_size_query_params params;
2614 
2615    if (is_sviewinfo) {
2616       target = bld->sv[unit].Resource;
2617    }
2618    else {
2619       target = inst->Texture.Texture;
2620    }
2621    switch (target) {
2622    case TGSI_TEXTURE_BUFFER:
2623    case TGSI_TEXTURE_RECT:
2624    case TGSI_TEXTURE_SHADOWRECT:
2625       has_lod = 0;
2626       break;
2627    default:
2628       has_lod = 1;
2629       break;
2630    }
2631 
2632    if (!bld->sampler) {
2633       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2634       for (i = 0; i < 4; i++)
2635          sizes_out[i] = bld->bld_base.int_bld.undef;
2636       return;
2637    }
2638 
2639    if (has_lod) {
2640       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2641       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2642    }
2643    else {
2644       explicit_lod = NULL;
2645       lod_property = LP_SAMPLER_LOD_SCALAR;
2646    }
2647 
2648 
2649    pipe_target = tgsi_to_pipe_tex_target(target);
2650 
2651    params.int_type = bld->bld_base.int_bld.type;
2652    params.texture_unit = unit;
2653    params.target = pipe_target;
2654    params.context_ptr = bld->context_ptr;
2655    params.is_sviewinfo = TRUE;
2656    params.lod_property = lod_property;
2657    params.explicit_lod = explicit_lod;
2658    params.sizes_out = sizes_out;
2659    params.samples_only = false;
2660 
2661    bld->sampler->emit_size_query(bld->sampler,
2662                                  bld->bld_base.base.gallivm,
2663                                  &params);
2664 }
2665 
2666 static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2667 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2668                    int pc)
2669 {
2670    unsigned i;
2671 
2672    for (i = 0; i < 5; i++) {
2673       enum tgsi_opcode opcode;
2674 
2675       if (pc + i >= bld->bld_base.info->num_instructions)
2676          return TRUE;
2677 
2678       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2679 
2680       if (opcode == TGSI_OPCODE_END)
2681          return TRUE;
2682 
2683       if (opcode == TGSI_OPCODE_TEX ||
2684          opcode == TGSI_OPCODE_TXP ||
2685          opcode == TGSI_OPCODE_TXD ||
2686          opcode == TGSI_OPCODE_TXB ||
2687          opcode == TGSI_OPCODE_TXL ||
2688          opcode == TGSI_OPCODE_TXF ||
2689          opcode == TGSI_OPCODE_TXQ ||
2690          opcode == TGSI_OPCODE_TEX2 ||
2691          opcode == TGSI_OPCODE_TXB2 ||
2692          opcode == TGSI_OPCODE_TXL2 ||
2693          opcode == TGSI_OPCODE_SAMPLE ||
2694          opcode == TGSI_OPCODE_SAMPLE_B ||
2695          opcode == TGSI_OPCODE_SAMPLE_C ||
2696          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2697          opcode == TGSI_OPCODE_SAMPLE_D ||
2698          opcode == TGSI_OPCODE_SAMPLE_I ||
2699          opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2700          opcode == TGSI_OPCODE_SAMPLE_L ||
2701          opcode == TGSI_OPCODE_SVIEWINFO ||
2702          opcode == TGSI_OPCODE_CAL ||
2703          opcode == TGSI_OPCODE_IF ||
2704          opcode == TGSI_OPCODE_UIF ||
2705          opcode == TGSI_OPCODE_BGNLOOP ||
2706          opcode == TGSI_OPCODE_SWITCH)
2707          return FALSE;
2708    }
2709 
2710    return TRUE;
2711 }
2712 
2713 
2714 
2715 /**
2716  * Kill fragment if any of the src register values are negative.
2717  */
2718 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2719 emit_kill_if(
2720    struct lp_build_tgsi_soa_context *bld,
2721    const struct tgsi_full_instruction *inst,
2722    int pc)
2723 {
2724    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2725    const struct tgsi_full_src_register *reg = &inst->Src[0];
2726    LLVMValueRef terms[TGSI_NUM_CHANNELS];
2727    LLVMValueRef mask;
2728    unsigned chan_index;
2729 
2730    memset(&terms, 0, sizeof terms);
2731 
2732    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2733       unsigned swizzle;
2734 
2735       /* Unswizzle channel */
2736       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2737 
2738       /* Check if the component has not been already tested. */
2739       assert(swizzle < TGSI_NUM_CHANNELS);
2740       if( !terms[swizzle] )
2741          /* TODO: change the comparison operator instead of setting the sign */
2742          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2743    }
2744 
2745    mask = NULL;
2746    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2747       if(terms[chan_index]) {
2748          LLVMValueRef chan_mask;
2749 
2750          /*
2751           * If term < 0 then mask = 0 else mask = ~0.
2752           */
2753          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2754 
2755          if(mask)
2756             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2757          else
2758             mask = chan_mask;
2759       }
2760    }
2761 
2762    if (bld->exec_mask.has_mask) {
2763       LLVMValueRef invmask;
2764       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2765       mask = LLVMBuildOr(builder, mask, invmask, "");
2766    }
2767 
2768    lp_build_mask_update(bld->mask, mask);
2769    if (!near_end_of_shader(bld, pc))
2770       lp_build_mask_check(bld->mask);
2771 }
2772 
2773 
2774 /**
2775  * Unconditional fragment kill.
2776  * The only predication is the execution mask which will apply if
2777  * we're inside a loop or conditional.
2778  */
2779 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2780 emit_kill(struct lp_build_tgsi_soa_context *bld,
2781           int pc)
2782 {
2783    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2784    LLVMValueRef mask;
2785 
2786    /* For those channels which are "alive", disable fragment shader
2787     * execution.
2788     */
2789    if (bld->exec_mask.has_mask) {
2790       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2791    }
2792    else {
2793       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2794       mask = zero;
2795    }
2796 
2797    lp_build_mask_update(bld->mask, mask);
2798 
2799    if (!near_end_of_shader(bld, pc))
2800       lp_build_mask_check(bld->mask);
2801 }
2802 
2803 
2804 /**
2805  * Emit code which will dump the value of all the temporary registers
2806  * to stdout.
2807  */
2808 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2809 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2810                unsigned file)
2811 {
2812    const struct tgsi_shader_info *info = bld->bld_base.info;
2813    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2814    LLVMBuilderRef builder = gallivm->builder;
2815    LLVMValueRef reg_ptr;
2816    int index;
2817    int max_index = info->file_max[file];
2818 
2819    /*
2820     * Some register files, particularly constants, can be very large,
2821     * and dumping everything could make this unusably slow.
2822     */
2823    max_index = MIN2(max_index, 32);
2824 
2825    for (index = 0; index <= max_index; index++) {
2826       LLVMValueRef res;
2827       unsigned mask;
2828       int chan;
2829 
2830       if (index < 8 * sizeof(unsigned) &&
2831           (info->file_mask[file] & (1u << index)) == 0)  {
2832          /* This was not declared.*/
2833          continue;
2834       }
2835 
2836       if (file == TGSI_FILE_INPUT) {
2837          mask = info->input_usage_mask[index];
2838       } else {
2839          mask = TGSI_WRITEMASK_XYZW;
2840       }
2841 
2842       for (chan = 0; chan < 4; chan++) {
2843          if ((mask & (1 << chan)) == 0) {
2844             /* This channel is not used.*/
2845             continue;
2846          }
2847 
2848          if (file == TGSI_FILE_CONSTANT) {
2849             struct tgsi_full_src_register reg;
2850             memset(&reg, 0, sizeof reg);
2851             reg.Register.File = file;
2852             reg.Register.Index = index;
2853             reg.Register.SwizzleX = 0;
2854             reg.Register.SwizzleY = 1;
2855             reg.Register.SwizzleZ = 2;
2856             reg.Register.SwizzleW = 3;
2857 
2858             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2859             if (!res) {
2860                continue;
2861             }
2862          } else if (file == TGSI_FILE_INPUT) {
2863             res = bld->inputs[index][chan];
2864             if (!res) {
2865                continue;
2866             }
2867          } else if (file == TGSI_FILE_TEMPORARY) {
2868             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2869             assert(reg_ptr);
2870             res = LLVMBuildLoad(builder, reg_ptr, "");
2871          } else if (file == TGSI_FILE_OUTPUT) {
2872             reg_ptr = lp_get_output_ptr(bld, index, chan);
2873             assert(reg_ptr);
2874             res = LLVMBuildLoad(builder, reg_ptr, "");
2875          } else {
2876             assert(0);
2877             continue;
2878          }
2879 
2880          emit_dump_reg(gallivm, file, index, chan, res);
2881       }
2882    }
2883 }
2884 
2885 
2886 
2887 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2888 lp_emit_declaration_soa(
2889    struct lp_build_tgsi_context *bld_base,
2890    const struct tgsi_full_declaration *decl)
2891 {
2892    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2893    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2894    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2895    const unsigned first = decl->Range.First;
2896    const unsigned last = decl->Range.Last;
2897    unsigned idx, i;
2898 
2899    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2900 
2901    switch (decl->Declaration.File) {
2902    case TGSI_FILE_TEMPORARY:
2903       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2904          assert(last < LP_MAX_INLINED_TEMPS);
2905          for (idx = first; idx <= last; ++idx) {
2906             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2907                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2908          }
2909       }
2910       break;
2911 
2912    case TGSI_FILE_OUTPUT:
2913       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2914          for (idx = first; idx <= last; ++idx) {
2915             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2916                bld->outputs[idx][i] = lp_build_alloca(gallivm,
2917                                                       vec_type, "output");
2918          }
2919       }
2920       break;
2921 
2922    case TGSI_FILE_ADDRESS:
2923       /* ADDR registers are only allocated with an integer LLVM IR type,
2924        * as they are guaranteed to always have integers.
2925        * XXX: Not sure if this exception is worthwhile (or the whole idea of
2926        * an ADDR register for that matter).
2927        */
2928       assert(last < LP_MAX_TGSI_ADDRS);
2929       for (idx = first; idx <= last; ++idx) {
2930          assert(idx < LP_MAX_TGSI_ADDRS);
2931          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2932             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2933       }
2934       break;
2935 
2936    case TGSI_FILE_SAMPLER_VIEW:
2937       /*
2938        * The target stored here MUST match whatever there actually
2939        * is in the set sampler views (what about return type?).
2940        */
2941       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2942       for (idx = first; idx <= last; ++idx) {
2943          bld->sv[idx] = decl->SamplerView;
2944       }
2945       break;
2946 
2947    case TGSI_FILE_CONSTANT:
2948    {
2949       /*
2950        * We could trivially fetch the per-buffer pointer when fetching the
2951        * constant, relying on llvm to figure out it's always the same pointer
2952        * anyway. However, doing so results in a huge (more than factor of 10)
2953        * slowdown in llvm compilation times for some (but not all) shaders
2954        * (more specifically, the IR optimization spends way more time in
2955        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2956        */
2957       unsigned idx2D = decl->Dim.Index2D;
2958       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2959       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2960       bld->consts[idx2D] =
2961          lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2962       bld->consts_sizes[idx2D] =
2963          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2964    }
2965    break;
2966    case TGSI_FILE_BUFFER:
2967    {
2968       unsigned idx = decl->Range.First;
2969       LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2970       assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2971       bld->ssbos[idx] =
2972          lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2973       bld->ssbo_sizes[idx] =
2974          lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2975 
2976    }
2977    break;
2978    case TGSI_FILE_MEMORY:
2979       break;
2980    default:
2981       /* don't need to declare other vars */
2982       break;
2983    }
2984 }
2985 
2986 
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)2987 void lp_emit_immediate_soa(
2988    struct lp_build_tgsi_context *bld_base,
2989    const struct tgsi_full_immediate *imm)
2990 {
2991    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2992    struct gallivm_state * gallivm = bld_base->base.gallivm;
2993    LLVMValueRef imms[4];
2994    unsigned i;
2995    const uint size = imm->Immediate.NrTokens - 1;
2996    assert(size <= 4);
2997    switch (imm->Immediate.DataType) {
2998    case TGSI_IMM_FLOAT32:
2999       for( i = 0; i < size; ++i )
3000          imms[i] =
3001                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3002 
3003       break;
3004    case TGSI_IMM_FLOAT64:
3005    case TGSI_IMM_UINT64:
3006    case TGSI_IMM_INT64:
3007    case TGSI_IMM_UINT32:
3008       for( i = 0; i < size; ++i ) {
3009          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3010          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3011       }
3012 
3013       break;
3014    case TGSI_IMM_INT32:
3015       for( i = 0; i < size; ++i ) {
3016          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3017          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3018       }
3019 
3020       break;
3021    }
3022    for( i = size; i < 4; ++i )
3023       imms[i] = bld_base->base.undef;
3024 
3025    if (bld->use_immediates_array) {
3026       unsigned index = bld->num_immediates;
3027       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3028       LLVMBuilderRef builder = gallivm->builder;
3029       LLVMValueRef gep[2];
3030       gep[0] = lp_build_const_int32(gallivm, 0);
3031 
3032       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3033       for (i = 0; i < 4; ++i ) {
3034          gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3035          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3036                                              bld->imms_array, gep, 2, "");
3037          LLVMBuildStore(builder, imms[i], imm_ptr);
3038       }
3039    } else {
3040       /* simply copy the immediate values into the next immediates[] slot */
3041       unsigned i;
3042       assert(imm->Immediate.NrTokens - 1 <= 4);
3043       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3044 
3045       for(i = 0; i < 4; ++i )
3046          bld->immediates[bld->num_immediates][i] = imms[i];
3047 
3048       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3049          unsigned index = bld->num_immediates;
3050          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3051          LLVMBuilderRef builder = gallivm->builder;
3052          LLVMValueRef gep[2];
3053          gep[0] = lp_build_const_int32(gallivm, 0);
3054          for (i = 0; i < 4; ++i ) {
3055             gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3056             LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3057                                                 bld->imms_array, gep, 2, "");
3058             LLVMBuildStore(builder,
3059                            bld->immediates[index][i],
3060                            imm_ptr);
3061          }
3062       }
3063    }
3064 
3065    bld->num_immediates++;
3066 }
3067 
3068 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3069 ddx_emit(
3070    const struct lp_build_tgsi_action * action,
3071    struct lp_build_tgsi_context * bld_base,
3072    struct lp_build_emit_data * emit_data)
3073 {
3074    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3075 
3076    emit_fetch_deriv(bld, emit_data->args[0], NULL,
3077                     &emit_data->output[emit_data->chan], NULL);
3078 }
3079 
3080 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3081 ddy_emit(
3082    const struct lp_build_tgsi_action * action,
3083    struct lp_build_tgsi_context * bld_base,
3084    struct lp_build_emit_data * emit_data)
3085 {
3086    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3087 
3088    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3089                     &emit_data->output[emit_data->chan]);
3090 }
3091 
3092 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3093 kill_emit(
3094    const struct lp_build_tgsi_action * action,
3095    struct lp_build_tgsi_context * bld_base,
3096    struct lp_build_emit_data * emit_data)
3097 {
3098    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3099 
3100    emit_kill(bld, bld_base->pc - 1);
3101 }
3102 
3103 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3104 kill_if_emit(
3105    const struct lp_build_tgsi_action * action,
3106    struct lp_build_tgsi_context * bld_base,
3107    struct lp_build_emit_data * emit_data)
3108 {
3109    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3110 
3111    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3112 }
3113 
3114 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3115 tex_emit(
3116    const struct lp_build_tgsi_action * action,
3117    struct lp_build_tgsi_context * bld_base,
3118    struct lp_build_emit_data * emit_data)
3119 {
3120    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3121 
3122    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3123             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3124 }
3125 
3126 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3127 tex2_emit(
3128    const struct lp_build_tgsi_action * action,
3129    struct lp_build_tgsi_context * bld_base,
3130    struct lp_build_emit_data * emit_data)
3131 {
3132    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3133 
3134    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3135             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3136 }
3137 
3138 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3139 txb_emit(
3140    const struct lp_build_tgsi_action * action,
3141    struct lp_build_tgsi_context * bld_base,
3142    struct lp_build_emit_data * emit_data)
3143 {
3144    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3145 
3146    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3147             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3148 }
3149 
3150 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3151 txb2_emit(
3152    const struct lp_build_tgsi_action * action,
3153    struct lp_build_tgsi_context * bld_base,
3154    struct lp_build_emit_data * emit_data)
3155 {
3156    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3157 
3158    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3159             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3160 }
3161 
3162 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3163 txd_emit(
3164    const struct lp_build_tgsi_action * action,
3165    struct lp_build_tgsi_context * bld_base,
3166    struct lp_build_emit_data * emit_data)
3167 {
3168    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3169 
3170    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3171             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3172 }
3173 
3174 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3175 txl_emit(
3176    const struct lp_build_tgsi_action * action,
3177    struct lp_build_tgsi_context * bld_base,
3178    struct lp_build_emit_data * emit_data)
3179 {
3180    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3181 
3182    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3183             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3184 }
3185 
3186 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3187 txl2_emit(
3188    const struct lp_build_tgsi_action * action,
3189    struct lp_build_tgsi_context * bld_base,
3190    struct lp_build_emit_data * emit_data)
3191 {
3192    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3193 
3194    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3195             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3196 }
3197 
3198 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3199 txp_emit(
3200    const struct lp_build_tgsi_action * action,
3201    struct lp_build_tgsi_context * bld_base,
3202    struct lp_build_emit_data * emit_data)
3203 {
3204    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3205 
3206    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3207             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3208 }
3209 
3210 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3211 tg4_emit(
3212    const struct lp_build_tgsi_action * action,
3213    struct lp_build_tgsi_context * bld_base,
3214    struct lp_build_emit_data * emit_data)
3215 {
3216    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3217 
3218    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3219             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3220 }
3221 
3222 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3223 lodq_emit(
3224    const struct lp_build_tgsi_action * action,
3225    struct lp_build_tgsi_context * bld_base,
3226    struct lp_build_emit_data * emit_data)
3227 {
3228    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3229 
3230    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3231             emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3232 }
3233 
3234 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3235 txq_emit(
3236    const struct lp_build_tgsi_action * action,
3237    struct lp_build_tgsi_context * bld_base,
3238    struct lp_build_emit_data * emit_data)
3239 {
3240    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3241 
3242    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3243 }
3244 
3245 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3246 txf_emit(
3247    const struct lp_build_tgsi_action * action,
3248    struct lp_build_tgsi_context * bld_base,
3249    struct lp_build_emit_data * emit_data)
3250 {
3251    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3252 
3253    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3254 }
3255 
3256 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3257 sample_i_emit(
3258    const struct lp_build_tgsi_action * action,
3259    struct lp_build_tgsi_context * bld_base,
3260    struct lp_build_emit_data * emit_data)
3261 {
3262    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3263 
3264    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3265 }
3266 
3267 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3268 sample_emit(
3269    const struct lp_build_tgsi_action * action,
3270    struct lp_build_tgsi_context * bld_base,
3271    struct lp_build_emit_data * emit_data)
3272 {
3273    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3274 
3275    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3276                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3277 }
3278 
3279 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3280 sample_b_emit(
3281    const struct lp_build_tgsi_action * action,
3282    struct lp_build_tgsi_context * bld_base,
3283    struct lp_build_emit_data * emit_data)
3284 {
3285    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3286 
3287    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3288                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3289 }
3290 
3291 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3292 sample_c_emit(
3293    const struct lp_build_tgsi_action * action,
3294    struct lp_build_tgsi_context * bld_base,
3295    struct lp_build_emit_data * emit_data)
3296 {
3297    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3298 
3299    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3300                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3301 }
3302 
3303 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3304 sample_c_lz_emit(
3305    const struct lp_build_tgsi_action * action,
3306    struct lp_build_tgsi_context * bld_base,
3307    struct lp_build_emit_data * emit_data)
3308 {
3309    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3310 
3311    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3312                TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3313 }
3314 
3315 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3316 sample_d_emit(
3317    const struct lp_build_tgsi_action * action,
3318    struct lp_build_tgsi_context * bld_base,
3319    struct lp_build_emit_data * emit_data)
3320 {
3321    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3322 
3323    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3324                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3325 }
3326 
3327 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3328 sample_l_emit(
3329    const struct lp_build_tgsi_action * action,
3330    struct lp_build_tgsi_context * bld_base,
3331    struct lp_build_emit_data * emit_data)
3332 {
3333    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3334 
3335    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3336                FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3337 }
3338 
3339 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3340 gather4_emit(
3341    const struct lp_build_tgsi_action * action,
3342    struct lp_build_tgsi_context * bld_base,
3343    struct lp_build_emit_data * emit_data)
3344 {
3345    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3346 
3347    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3348                FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3349 }
3350 
3351 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3352 sviewinfo_emit(
3353    const struct lp_build_tgsi_action * action,
3354    struct lp_build_tgsi_context * bld_base,
3355    struct lp_build_emit_data * emit_data)
3356 {
3357    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3358 
3359    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3360 }
3361 
3362 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3363 lod_emit(
3364    const struct lp_build_tgsi_action * action,
3365    struct lp_build_tgsi_context * bld_base,
3366    struct lp_build_emit_data * emit_data)
3367 {
3368    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3369 
3370    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3371                FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3372 }
3373 
target_to_dims_layer(unsigned target,unsigned * dims,unsigned * layer_coord)3374 static void target_to_dims_layer(unsigned target,
3375                                  unsigned *dims,
3376                                  unsigned *layer_coord)
3377 {
3378    *layer_coord = 0;
3379    switch (target) {
3380    case TGSI_TEXTURE_1D:
3381    case TGSI_TEXTURE_BUFFER:
3382       *dims = 1;
3383       break;
3384    case TGSI_TEXTURE_1D_ARRAY:
3385       *layer_coord = 1;
3386       *dims = 1;
3387       break;
3388    case TGSI_TEXTURE_2D:
3389    case TGSI_TEXTURE_RECT:
3390       *dims = 2;
3391       break;
3392    case TGSI_TEXTURE_2D_ARRAY:
3393       *layer_coord = 2;
3394       *dims = 2;
3395       break;
3396    case TGSI_TEXTURE_3D:
3397    case TGSI_TEXTURE_CUBE:
3398    case TGSI_TEXTURE_CUBE_ARRAY:
3399       *dims = 3;
3400       break;
3401    default:
3402       assert(0);
3403       *dims = 0;
3404       return;
3405    }
3406 }
3407 
3408 static void
img_load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3409 img_load_emit(
3410    const struct lp_build_tgsi_action * action,
3411    struct lp_build_tgsi_context * bld_base,
3412    struct lp_build_emit_data * emit_data)
3413 {
3414    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3415    struct lp_img_params params;
3416    LLVMValueRef coords[5];
3417    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3418    unsigned dims;
3419    unsigned target = emit_data->inst->Memory.Texture;
3420    unsigned layer_coord;
3421 
3422    target_to_dims_layer(target, &dims, &layer_coord);
3423 
3424    for (unsigned i = 0; i < dims; i++) {
3425       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3426    }
3427    for (unsigned i = dims; i < 5; i++) {
3428       coords[i] = coord_undef;
3429    }
3430    if (layer_coord)
3431       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3432 
3433    memset(&params, 0, sizeof(params));
3434 
3435    params.type = bld->bld_base.base.type;
3436    params.context_ptr = bld->context_ptr;
3437    params.thread_data_ptr = bld->thread_data_ptr;
3438    params.coords = coords;
3439    params.outdata = emit_data->output;
3440    params.target = tgsi_to_pipe_tex_target(target);
3441    params.image_index = emit_data->inst->Src[0].Register.Index;
3442    params.img_op = LP_IMG_LOAD;
3443    bld->image->emit_op(bld->image,
3444                          bld->bld_base.base.gallivm,
3445                          &params);
3446 }
3447 
3448 static void
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3449 load_emit(
3450    const struct lp_build_tgsi_action * action,
3451    struct lp_build_tgsi_context * bld_base,
3452    struct lp_build_emit_data * emit_data)
3453 {
3454    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3455    struct gallivm_state * gallivm = bld_base->base.gallivm;
3456    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3457    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3458    unsigned buf = bufreg->Register.Index;
3459    assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3460           bufreg->Register.File == TGSI_FILE_IMAGE ||
3461           bufreg->Register.File == TGSI_FILE_MEMORY ||
3462           bufreg->Register.File == TGSI_FILE_CONSTBUF);
3463    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3464    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3465 
3466    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3467       img_load_emit(action, bld_base, emit_data);
3468    } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3469       LLVMValueRef consts_ptr = bld->consts[buf];
3470       LLVMValueRef num_consts = bld->consts_sizes[buf];
3471 
3472       LLVMValueRef indirect_index;
3473       LLVMValueRef overflow_mask;
3474 
3475       indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3476       indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3477 
3478       /* All fetches are from the same constant buffer, so
3479        * we need to propagate the size to a vector to do a
3480        * vector comparison */
3481       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3482 
3483       /* Gather values from the constant buffer */
3484       unsigned chan_index;
3485       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3486          /* Construct a boolean vector telling us which channels
3487           * overflow the bound constant buffer */
3488          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3489                                           indirect_index, num_consts);
3490 
3491          /* index_vec = indirect_index * 4 */
3492          LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3493          index_vec = lp_build_add(uint_bld, index_vec,
3494                                   lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3495 
3496          emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3497       }
3498    } else if (0) {
3499       /* for indirect support with ARB_gpu_shader5 */
3500    } else {
3501       LLVMValueRef index;
3502       LLVMValueRef scalar, scalar_ptr;
3503       unsigned chan_index;
3504 
3505       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3506       index = lp_build_shr_imm(uint_bld, index, 2);
3507 
3508       scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3509 
3510       LLVMValueRef ssbo_limit = NULL;
3511 
3512       if (!is_shared) {
3513          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3514          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3515       }
3516 
3517       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3518          LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3519 
3520          LLVMValueRef exec_mask = mask_vec(bld_base);
3521          if (!is_shared) {
3522             LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3523             exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3524          }
3525 
3526          LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3527          struct lp_build_loop_state loop_state;
3528          lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3529 
3530          struct lp_build_if_state ifthen;
3531          LLVMValueRef cond, temp_res;
3532 
3533          loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3534                                               loop_state.counter, "");
3535 
3536          cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3537          cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3538 
3539          lp_build_if(&ifthen, gallivm, cond);
3540          scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3541 
3542          temp_res = LLVMBuildLoad(builder, result, "");
3543          temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3544          LLVMBuildStore(builder, temp_res, result);
3545          lp_build_else(&ifthen);
3546          temp_res = LLVMBuildLoad(builder, result, "");
3547          temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3548          LLVMBuildStore(builder, temp_res, result);
3549          lp_build_endif(&ifthen);
3550          lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3551                                 NULL, LLVMIntUGE);
3552          emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3553       }
3554    }
3555 }
3556 
3557 static void
img_store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3558 img_store_emit(
3559    const struct lp_build_tgsi_action * action,
3560    struct lp_build_tgsi_context * bld_base,
3561    struct lp_build_emit_data * emit_data)
3562 {
3563    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3564    struct lp_img_params params;
3565    LLVMValueRef coords[5];
3566    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3567    unsigned dims;
3568    unsigned target = emit_data->inst->Memory.Texture;
3569    unsigned layer_coord;
3570 
3571    target_to_dims_layer(target, &dims, &layer_coord);
3572    for (unsigned i = 0; i < dims; i++) {
3573       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3574    }
3575    for (unsigned i = dims; i < 5; i++) {
3576       coords[i] = coord_undef;
3577    }
3578    if (layer_coord)
3579       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3580    memset(&params, 0, sizeof(params));
3581 
3582    params.type = bld->bld_base.base.type;
3583    params.context_ptr = bld->context_ptr;
3584    params.thread_data_ptr = bld->thread_data_ptr;
3585    params.coords = coords;
3586    params.outdata = NULL;
3587    params.exec_mask = mask_vec(bld_base);
3588    params.target = tgsi_to_pipe_tex_target(target);
3589    params.image_index = emit_data->inst->Dst[0].Register.Index;
3590    params.img_op = LP_IMG_STORE;
3591    for (unsigned i = 0; i < 4; i++)
3592       params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3593 
3594    bld->image->emit_op(bld->image,
3595                        bld->bld_base.base.gallivm,
3596                        &params);
3597 }
3598 
3599 static void
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3600 store_emit(
3601    const struct lp_build_tgsi_action * action,
3602    struct lp_build_tgsi_context * bld_base,
3603    struct lp_build_emit_data * emit_data)
3604 {
3605    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3606    struct gallivm_state * gallivm = bld_base->base.gallivm;
3607    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3608    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3609    const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3610    unsigned buf = bufreg->Register.Index;
3611    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3612    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3613 
3614    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3615       img_store_emit(action, bld_base, emit_data);
3616    } else if (0) {
3617 
3618    } else {
3619       LLVMValueRef index;  /* index into the const buffer */
3620       LLVMValueRef scalar_ptr;
3621       LLVMValueRef value;
3622       unsigned chan_index;
3623 
3624       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3625       index = lp_build_shr_imm(uint_bld, index, 2);
3626 
3627       scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3628 
3629       LLVMValueRef ssbo_limit = NULL;
3630 
3631       if (!is_shared) {
3632          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3633          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3634       }
3635 
3636       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3637          LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3638 
3639          value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3640 
3641          LLVMValueRef exec_mask = mask_vec(bld_base);
3642          if (!is_shared) {
3643             LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3644             exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3645          }
3646 
3647          struct lp_build_loop_state loop_state;
3648          lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3649 
3650          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3651                                                           loop_state.counter, "");
3652          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3653 
3654          struct lp_build_if_state ifthen;
3655          LLVMValueRef cond;
3656 
3657          loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3658                                               loop_state.counter, "");
3659 
3660          cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3661          cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3662          lp_build_if(&ifthen, gallivm, cond);
3663 
3664          lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3665 
3666          lp_build_endif(&ifthen);
3667          lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3668                                 NULL, LLVMIntUGE);
3669       }
3670    }
3671 }
3672 
3673 static void
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3674 resq_emit(
3675    const struct lp_build_tgsi_action * action,
3676    struct lp_build_tgsi_context * bld_base,
3677    struct lp_build_emit_data * emit_data)
3678 {
3679    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3680    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3681    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3682 
3683    unsigned buf = bufreg->Register.Index;
3684    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3685 
3686    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3687       unsigned target = emit_data->inst->Memory.Texture;
3688       struct lp_sampler_size_query_params params = { 0 };
3689       params.int_type = bld->bld_base.int_bld.type;
3690       params.texture_unit = buf;
3691       params.target = tgsi_to_pipe_tex_target(target);
3692       params.context_ptr = bld->context_ptr;
3693       params.sizes_out = emit_data->output;
3694 
3695       bld->image->emit_size_query(bld->image,
3696                                   bld->bld_base.base.gallivm,
3697                                   &params);
3698    } else {
3699       LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3700 
3701       emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3702    }
3703 }
3704 
3705 static void
img_atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMAtomicRMWBinOp op)3706 img_atomic_emit(
3707    const struct lp_build_tgsi_action * action,
3708    struct lp_build_tgsi_context * bld_base,
3709    struct lp_build_emit_data * emit_data,
3710    LLVMAtomicRMWBinOp op)
3711 {
3712    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3713    struct lp_img_params params;
3714    LLVMValueRef coords[5];
3715    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3716    unsigned dims;
3717    unsigned layer_coord;
3718    unsigned target = emit_data->inst->Memory.Texture;
3719 
3720    target_to_dims_layer(target, &dims, &layer_coord);
3721 
3722    for (unsigned i = 0; i < dims; i++) {
3723       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3724    }
3725    for (unsigned i = dims; i < 5; i++) {
3726       coords[i] = coord_undef;
3727    }
3728    if (layer_coord)
3729       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3730    memset(&params, 0, sizeof(params));
3731 
3732    params.type = bld->bld_base.base.type;
3733    params.context_ptr = bld->context_ptr;
3734    params.thread_data_ptr = bld->thread_data_ptr;
3735    params.exec_mask = mask_vec(bld_base);
3736    params.image_index = emit_data->inst->Src[0].Register.Index;
3737    params.coords = coords;
3738    params.target = tgsi_to_pipe_tex_target(target);
3739    params.op = op;
3740    params.outdata = emit_data->output;
3741    params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3742 
3743    for (unsigned i = 0; i < 4; i++)
3744       params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3745    if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3746       for (unsigned i = 0; i < 4; i++)
3747          params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3748    }
3749    bld->image->emit_op(bld->image,
3750                        bld->bld_base.base.gallivm,
3751                        &params);
3752 }
3753 
3754 static void
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3755 atomic_emit(
3756    const struct lp_build_tgsi_action * action,
3757    struct lp_build_tgsi_context * bld_base,
3758    struct lp_build_emit_data * emit_data)
3759 {
3760    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3761    struct gallivm_state * gallivm = bld_base->base.gallivm;
3762    LLVMBuilderRef builder = gallivm->builder;
3763    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3764    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3765 
3766    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3767    unsigned buf = bufreg->Register.Index;
3768    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3769 
3770    LLVMAtomicRMWBinOp op = -1;
3771    switch (emit_data->inst->Instruction.Opcode) {
3772    case TGSI_OPCODE_ATOMUADD:
3773       op = LLVMAtomicRMWBinOpAdd;
3774       break;
3775    case TGSI_OPCODE_ATOMXCHG:
3776       op = LLVMAtomicRMWBinOpXchg;
3777       break;
3778    case TGSI_OPCODE_ATOMAND:
3779       op = LLVMAtomicRMWBinOpAnd;
3780       break;
3781    case TGSI_OPCODE_ATOMOR:
3782       op = LLVMAtomicRMWBinOpOr;
3783       break;
3784    case TGSI_OPCODE_ATOMXOR:
3785       op = LLVMAtomicRMWBinOpXor;
3786       break;
3787    case TGSI_OPCODE_ATOMUMIN:
3788       op = LLVMAtomicRMWBinOpUMin;
3789       break;
3790    case TGSI_OPCODE_ATOMUMAX:
3791       op = LLVMAtomicRMWBinOpUMax;
3792       break;
3793    case TGSI_OPCODE_ATOMIMIN:
3794       op = LLVMAtomicRMWBinOpMin;
3795       break;
3796    case TGSI_OPCODE_ATOMIMAX:
3797       op = LLVMAtomicRMWBinOpMax;
3798       break;
3799    case TGSI_OPCODE_ATOMCAS:
3800       break;
3801    default:
3802       assert(0);
3803       return;
3804    }
3805 
3806    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3807       img_atomic_emit(action, bld_base, emit_data, op);
3808    } else if (0) {
3809    } else {
3810       LLVMValueRef index;  /* index into the const buffer */
3811       LLVMValueRef scalar, scalar_ptr;
3812       LLVMValueRef value;
3813 
3814       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3815       value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3816 
3817       index = lp_build_shr_imm(uint_bld, index, 2);
3818 
3819       if (!is_shared) {
3820          index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3821          scalar_ptr = bld->ssbos[buf];
3822       } else
3823          scalar_ptr = bld->shared_ptr;
3824 
3825       LLVMValueRef atom_res = lp_build_alloca(gallivm,
3826                                               uint_bld->vec_type, "");
3827 
3828       LLVMValueRef ssbo_limit;
3829       if (!is_shared) {
3830          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3831          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3832       }
3833 
3834       LLVMValueRef exec_mask = mask_vec(bld_base);
3835 
3836       if (!is_shared) {
3837          LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3838          exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3839       }
3840 
3841       struct lp_build_loop_state loop_state;
3842       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3843 
3844       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3845                                                        loop_state.counter, "");
3846       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3847 
3848       index = LLVMBuildExtractElement(gallivm->builder, index,
3849                                       loop_state.counter, "");
3850 
3851       scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3852                                 &index, 1, "");
3853 
3854       struct lp_build_if_state ifthen;
3855       LLVMValueRef cond, temp_res;
3856 
3857       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3858       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3859       lp_build_if(&ifthen, gallivm, cond);
3860 
3861       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3862          LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3863          LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3864                                                             loop_state.counter, "");
3865          cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3866          scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3867                                          cas_src_ptr,
3868                                          LLVMAtomicOrderingSequentiallyConsistent,
3869                                          LLVMAtomicOrderingSequentiallyConsistent,
3870                                          false);
3871          scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3872       } else {
3873          scalar = LLVMBuildAtomicRMW(builder, op,
3874                                      scalar_ptr, value_ptr,
3875                                      LLVMAtomicOrderingSequentiallyConsistent,
3876                                      false);
3877       }
3878       temp_res = LLVMBuildLoad(builder, atom_res, "");
3879       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3880       LLVMBuildStore(builder, temp_res, atom_res);
3881       lp_build_else(&ifthen);
3882       temp_res = LLVMBuildLoad(builder, atom_res, "");
3883       temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3884       LLVMBuildStore(builder, temp_res, atom_res);
3885       lp_build_endif(&ifthen);
3886 
3887       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3888                              NULL, LLVMIntUGE);
3889       emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3890    }
3891 }
3892 
3893 static void
barrier_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3894 barrier_emit(
3895    const struct lp_build_tgsi_action * action,
3896    struct lp_build_tgsi_context * bld_base,
3897    struct lp_build_emit_data * emit_data)
3898 {
3899    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3900    struct gallivm_state * gallivm = bld_base->base.gallivm;
3901 
3902    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3903 
3904    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3905    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3906 }
3907 
3908 static void
membar_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3909 membar_emit(
3910    const struct lp_build_tgsi_action * action,
3911    struct lp_build_tgsi_context * bld_base,
3912    struct lp_build_emit_data * emit_data)
3913 {
3914    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3915    LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3916 }
3917 
3918 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3919 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3920                           LLVMValueRef ptr,
3921                           LLVMValueRef mask)
3922 {
3923    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3924    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3925 
3926    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3927 
3928    LLVMBuildStore(builder, current_vec, ptr);
3929 }
3930 
3931 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3932 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3933                              LLVMValueRef ptr,
3934                              LLVMValueRef mask)
3935 {
3936    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3937    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3938 
3939    current_vec = lp_build_select(&bld_base->uint_bld,
3940                                  mask,
3941                                  bld_base->uint_bld.zero,
3942                                  current_vec);
3943 
3944    LLVMBuildStore(builder, current_vec, ptr);
3945 }
3946 
3947 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3948 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3949                                   LLVMValueRef current_mask_vec,
3950                                   LLVMValueRef total_emitted_vertices_vec)
3951 {
3952    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3953    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3954    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3955                                         total_emitted_vertices_vec,
3956                                         bld->max_output_vertices_vec);
3957 
3958    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3959 }
3960 
3961 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3962 emit_vertex(
3963    const struct lp_build_tgsi_action * action,
3964    struct lp_build_tgsi_context * bld_base,
3965    struct lp_build_emit_data * emit_data)
3966 {
3967    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3968    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3969 
3970    if (bld->gs_iface->emit_vertex) {
3971       LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3972                                                     TGSI_TYPE_UNSIGNED,
3973                                                     emit_data->inst->Src[0].Register.SwizzleX);
3974       LLVMValueRef mask = mask_vec(bld_base);
3975       LLVMValueRef total_emitted_vertices_vec =
3976          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3977 
3978       mask = clamp_mask_to_max_output_vertices(bld, mask,
3979                                                total_emitted_vertices_vec);
3980       gather_outputs(bld);
3981       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3982                                  bld->outputs,
3983                                  total_emitted_vertices_vec,
3984                                  mask,
3985                                  stream_id);
3986       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3987                                 mask);
3988       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3989                                 mask);
3990 #if DUMP_GS_EMITS
3991       lp_build_print_value(bld->bld_base.base.gallivm,
3992                            " +++ emit vertex masked ones = ",
3993                            mask);
3994       lp_build_print_value(bld->bld_base.base.gallivm,
3995                            " +++ emit vertex emitted = ",
3996                            total_emitted_vertices_vec);
3997 #endif
3998    }
3999 }
4000 
4001 
4002 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)4003 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
4004                      LLVMValueRef mask)
4005 {
4006    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4007    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4008 
4009    if (bld->gs_iface->end_primitive) {
4010       struct lp_build_context *uint_bld = &bld_base->uint_bld;
4011       LLVMValueRef emitted_vertices_vec =
4012          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4013       LLVMValueRef emitted_prims_vec =
4014          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4015       LLVMValueRef total_emitted_vertices_vec =
4016          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4017       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4018                                                emitted_vertices_vec,
4019                                                uint_bld->zero);
4020       /* We need to combine the current execution mask with the mask
4021          telling us which, if any, execution slots actually have
4022          unemitted primitives, this way we make sure that end_primitives
4023          executes only on the paths that have unflushed vertices */
4024       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4025 
4026       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4027                                    total_emitted_vertices_vec,
4028                                    emitted_vertices_vec,
4029                                    emitted_prims_vec,
4030                                    mask_vec(bld_base), 0);
4031 
4032 #if DUMP_GS_EMITS
4033       lp_build_print_value(bld->bld_base.base.gallivm,
4034                            " +++ end prim masked ones = ",
4035                            mask);
4036       lp_build_print_value(bld->bld_base.base.gallivm,
4037                            " +++ end prim emitted verts1 = ",
4038                            emitted_vertices_vec);
4039       lp_build_print_value(bld->bld_base.base.gallivm,
4040                            " +++ end prim emitted prims1 = ",
4041                            LLVMBuildLoad(builder,
4042                                          bld->emitted_prims_vec_ptr, ""));
4043 #endif
4044       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4045                                 mask);
4046       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4047                                    mask);
4048 #if DUMP_GS_EMITS
4049       lp_build_print_value(bld->bld_base.base.gallivm,
4050                            " +++ end prim emitted verts2 = ",
4051                            LLVMBuildLoad(builder,
4052                                          bld->emitted_vertices_vec_ptr, ""));
4053 #endif
4054    }
4055 
4056 }
4057 
4058 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4059 end_primitive(
4060    const struct lp_build_tgsi_action * action,
4061    struct lp_build_tgsi_context * bld_base,
4062    struct lp_build_emit_data * emit_data)
4063 {
4064    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4065 
4066    if (bld->gs_iface->end_primitive) {
4067       LLVMValueRef mask = mask_vec(bld_base);
4068       end_primitive_masked(bld_base, mask);
4069    }
4070 }
4071 
4072 static void
barrier_emit_tcs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4073 barrier_emit_tcs(
4074    const struct lp_build_tgsi_action * action,
4075    struct lp_build_tgsi_context * bld_base,
4076    struct lp_build_emit_data * emit_data)
4077 {
4078    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4079 
4080    if (bld->tcs_iface->emit_barrier) {
4081       bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4082    }
4083 }
4084 
4085 
4086 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4087 cal_emit(
4088    const struct lp_build_tgsi_action * action,
4089    struct lp_build_tgsi_context * bld_base,
4090    struct lp_build_emit_data * emit_data)
4091 {
4092    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4093 
4094    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4095                      &bld_base->pc);
4096 }
4097 
4098 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4099 ret_emit(
4100    const struct lp_build_tgsi_action * action,
4101    struct lp_build_tgsi_context * bld_base,
4102    struct lp_build_emit_data * emit_data)
4103 {
4104    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4105 
4106    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4107 }
4108 
4109 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4110 brk_emit(
4111    const struct lp_build_tgsi_action * action,
4112    struct lp_build_tgsi_context * bld_base,
4113    struct lp_build_emit_data * emit_data)
4114 {
4115    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4116 
4117    lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4118 }
4119 
4120 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4121 if_emit(
4122    const struct lp_build_tgsi_action * action,
4123    struct lp_build_tgsi_context * bld_base,
4124    struct lp_build_emit_data * emit_data)
4125 {
4126    LLVMValueRef tmp;
4127    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4128 
4129    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4130                       emit_data->args[0], bld->bld_base.base.zero);
4131    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4132 }
4133 
4134 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4135 uif_emit(
4136    const struct lp_build_tgsi_action * action,
4137    struct lp_build_tgsi_context * bld_base,
4138    struct lp_build_emit_data * emit_data)
4139 {
4140    LLVMValueRef tmp;
4141    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4142    struct lp_build_context *uint_bld = &bld_base->uint_bld;
4143 
4144    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4145                       emit_data->args[0], uint_bld->zero);
4146    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4147 }
4148 
4149 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4150 case_emit(
4151    const struct lp_build_tgsi_action * action,
4152    struct lp_build_tgsi_context * bld_base,
4153    struct lp_build_emit_data * emit_data)
4154 {
4155    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4156 
4157    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4158 }
4159 
4160 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4161 default_emit(
4162    const struct lp_build_tgsi_action * action,
4163    struct lp_build_tgsi_context * bld_base,
4164    struct lp_build_emit_data * emit_data)
4165 {
4166    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4167 
4168    lp_exec_default(&bld->exec_mask, bld_base);
4169 }
4170 
4171 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4172 switch_emit(
4173    const struct lp_build_tgsi_action * action,
4174    struct lp_build_tgsi_context * bld_base,
4175    struct lp_build_emit_data * emit_data)
4176 {
4177    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4178 
4179    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4180 }
4181 
4182 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4183 endswitch_emit(
4184    const struct lp_build_tgsi_action * action,
4185    struct lp_build_tgsi_context * bld_base,
4186    struct lp_build_emit_data * emit_data)
4187 {
4188    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4189 
4190    lp_exec_endswitch(&bld->exec_mask, bld_base);
4191 }
4192 
4193 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4194 bgnloop_emit(
4195    const struct lp_build_tgsi_action * action,
4196    struct lp_build_tgsi_context * bld_base,
4197    struct lp_build_emit_data * emit_data)
4198 {
4199    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4200 
4201    lp_exec_bgnloop(&bld->exec_mask, true);
4202 }
4203 
4204 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4205 bgnsub_emit(
4206    const struct lp_build_tgsi_action * action,
4207    struct lp_build_tgsi_context * bld_base,
4208    struct lp_build_emit_data * emit_data)
4209 {
4210    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4211 
4212    lp_exec_mask_bgnsub(&bld->exec_mask);
4213 }
4214 
4215 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4216 else_emit(
4217    const struct lp_build_tgsi_action * action,
4218    struct lp_build_tgsi_context * bld_base,
4219    struct lp_build_emit_data * emit_data)
4220 {
4221    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4222 
4223    lp_exec_mask_cond_invert(&bld->exec_mask);
4224 }
4225 
4226 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4227 endif_emit(
4228    const struct lp_build_tgsi_action * action,
4229    struct lp_build_tgsi_context * bld_base,
4230    struct lp_build_emit_data * emit_data)
4231 {
4232    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4233 
4234    lp_exec_mask_cond_pop(&bld->exec_mask);
4235 }
4236 
4237 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4238 endloop_emit(
4239    const struct lp_build_tgsi_action * action,
4240    struct lp_build_tgsi_context * bld_base,
4241    struct lp_build_emit_data * emit_data)
4242 {
4243    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4244 
4245    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4246 }
4247 
4248 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4249 endsub_emit(
4250    const struct lp_build_tgsi_action * action,
4251    struct lp_build_tgsi_context * bld_base,
4252    struct lp_build_emit_data * emit_data)
4253 {
4254    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4255 
4256    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4257 }
4258 
4259 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4260 cont_emit(
4261    const struct lp_build_tgsi_action * action,
4262    struct lp_build_tgsi_context * bld_base,
4263    struct lp_build_emit_data * emit_data)
4264 {
4265    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4266 
4267    lp_exec_continue(&bld->exec_mask);
4268 }
4269 
emit_prologue(struct lp_build_tgsi_context * bld_base)4270 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4271 {
4272    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4273    struct gallivm_state * gallivm = bld_base->base.gallivm;
4274 
4275    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4276       unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4277       bld->temps_array = lp_build_alloca_undef(gallivm,
4278                                                LLVMArrayType(bld_base->base.vec_type, array_size),
4279                                                "temp_array");
4280    }
4281 
4282    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4283       LLVMValueRef array_size =
4284          lp_build_const_int32(gallivm,
4285                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4286       bld->outputs_array = lp_build_array_alloca(gallivm,
4287                                                 bld_base->base.vec_type, array_size,
4288                                                 "output_array");
4289    }
4290 
4291    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4292       unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4293       bld->imms_array = lp_build_alloca_undef(gallivm,
4294                                               LLVMArrayType(bld_base->base.vec_type, array_size),
4295                                               "imms_array");
4296    }
4297 
4298    /* If we have indirect addressing in inputs we need to copy them into
4299     * our alloca array to be able to iterate over them */
4300    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4301        !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4302       unsigned index, chan;
4303       LLVMTypeRef vec_type = bld_base->base.vec_type;
4304       LLVMValueRef array_size = lp_build_const_int32(gallivm,
4305             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4306       bld->inputs_array = lp_build_array_alloca(gallivm,
4307                                                vec_type, array_size,
4308                                                "input_array");
4309 
4310       assert(bld_base->info->num_inputs
4311                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4312 
4313       for (index = 0; index < bld_base->info->num_inputs; ++index) {
4314          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4315             LLVMValueRef lindex =
4316                lp_build_const_int32(gallivm, index * 4 + chan);
4317             LLVMValueRef input_ptr =
4318                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4319                             &lindex, 1, "");
4320             LLVMValueRef value = bld->inputs[index][chan];
4321             if (value)
4322                LLVMBuildStore(gallivm->builder, value, input_ptr);
4323          }
4324       }
4325    }
4326 
4327    if (bld->gs_iface) {
4328       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4329       bld->emitted_prims_vec_ptr =
4330          lp_build_alloca(gallivm,
4331                          uint_bld->vec_type,
4332                          "emitted_prims_ptr");
4333       bld->emitted_vertices_vec_ptr =
4334          lp_build_alloca(gallivm,
4335                          uint_bld->vec_type,
4336                          "emitted_vertices_ptr");
4337       bld->total_emitted_vertices_vec_ptr =
4338          lp_build_alloca(gallivm,
4339                          uint_bld->vec_type,
4340                          "total_emitted_vertices_ptr");
4341 
4342       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4343                      bld->emitted_prims_vec_ptr);
4344       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4345                      bld->emitted_vertices_vec_ptr);
4346       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4347                      bld->total_emitted_vertices_vec_ptr);
4348    }
4349 
4350    if (DEBUG_EXECUTION) {
4351       lp_build_printf(gallivm, "\n");
4352       emit_dump_file(bld, TGSI_FILE_CONSTANT);
4353       if (!bld->gs_iface)
4354          emit_dump_file(bld, TGSI_FILE_INPUT);
4355    }
4356 }
4357 
emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)4358 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4359 {
4360    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4361 
4362    if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4363       bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4364    }
4365 }
4366 
emit_epilogue(struct lp_build_tgsi_context * bld_base)4367 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4368 {
4369    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4370    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4371 
4372    if (DEBUG_EXECUTION) {
4373       /* for debugging */
4374       if (0) {
4375          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4376       }
4377       emit_dump_file(bld, TGSI_FILE_OUTPUT);
4378       lp_build_printf(bld_base->base.gallivm, "\n");
4379    }
4380 
4381    if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4382       bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4383    }
4384 
4385    /* If we have indirect addressing in outputs we need to copy our alloca array
4386     * to the outputs slots specified by the caller */
4387    if (bld->gs_iface) {
4388       LLVMValueRef total_emitted_vertices_vec;
4389       LLVMValueRef emitted_prims_vec;
4390       /* implicit end_primitives, needed in case there are any unflushed
4391          vertices in the cache. Note must not call end_primitive here
4392          since the exec_mask is not valid at this point. */
4393       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4394 
4395       total_emitted_vertices_vec =
4396          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4397       emitted_prims_vec =
4398          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4399 
4400       bld->gs_iface->gs_epilogue(bld->gs_iface,
4401                                  total_emitted_vertices_vec,
4402                                  emitted_prims_vec, 0);
4403    } else {
4404       gather_outputs(bld);
4405    }
4406 }
4407 
4408 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])4409 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4410                   const struct tgsi_token *tokens,
4411                   const struct lp_build_tgsi_params *params,
4412                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4413 {
4414    struct lp_build_tgsi_soa_context bld;
4415    struct lp_type type = params->type;
4416    struct lp_type res_type;
4417 
4418    assert(type.length <= LP_MAX_VECTOR_LENGTH);
4419    memset(&res_type, 0, sizeof res_type);
4420    res_type.width = type.width;
4421    res_type.length = type.length;
4422    res_type.sign = 1;
4423 
4424    /* Setup build context */
4425    memset(&bld, 0, sizeof bld);
4426    lp_build_context_init(&bld.bld_base.base, gallivm, type);
4427    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4428    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4429    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4430    {
4431       struct lp_type dbl_type;
4432       dbl_type = type;
4433       dbl_type.width *= 2;
4434       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4435    }
4436    {
4437       struct lp_type uint64_type;
4438       uint64_type = lp_uint_type(type);
4439       uint64_type.width *= 2;
4440       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4441    }
4442    {
4443       struct lp_type int64_type;
4444       int64_type = lp_int_type(type);
4445       int64_type.width *= 2;
4446       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4447    }
4448    bld.mask = params->mask;
4449    bld.inputs = params->inputs;
4450    bld.outputs = outputs;
4451    bld.consts_ptr = params->consts_ptr;
4452    bld.const_sizes_ptr = params->const_sizes_ptr;
4453    bld.ssbo_ptr = params->ssbo_ptr;
4454    bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4455    bld.sampler = params->sampler;
4456    bld.bld_base.info = params->info;
4457    bld.indirect_files = params->info->indirect_files;
4458    bld.context_ptr = params->context_ptr;
4459    bld.thread_data_ptr = params->thread_data_ptr;
4460    bld.image = params->image;
4461    bld.shared_ptr = params->shared_ptr;
4462    bld.coro = params->coro;
4463 
4464    /*
4465     * If the number of temporaries is rather large then we just
4466     * allocate them as an array right from the start and treat
4467     * like indirect temporaries.
4468     */
4469    if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4470       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4471    }
4472    /*
4473     * For performance reason immediates are always backed in a static
4474     * array, but if their number is too great, we have to use just
4475     * a dynamically allocated array.
4476     */
4477    bld.use_immediates_array =
4478          (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4479    if (bld.use_immediates_array) {
4480       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4481    }
4482 
4483 
4484    bld.bld_base.soa = TRUE;
4485    bld.bld_base.emit_debug = emit_debug;
4486    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4487    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4488    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4489    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4490    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4491 
4492    bld.bld_base.emit_store = emit_store;
4493    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4494    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4495    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4496 
4497    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4498    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4499 
4500    bld.bld_base.emit_prologue = emit_prologue;
4501    bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4502    bld.bld_base.emit_epilogue = emit_epilogue;
4503 
4504    /* Set opcode actions */
4505    lp_set_default_actions_cpu(&bld.bld_base);
4506 
4507    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4508    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4509    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4510    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4511    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4512    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4513    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4514    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4515    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4516    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4517    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4518    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4519    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4520    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4521    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4522    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4523    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4524    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4525    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4526    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4527    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4528    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4529    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4530    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4531    bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4532    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4533    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4534    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4535    bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4536    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4537    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4538    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4539    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4540    bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4541    /* DX10 sampling ops */
4542    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4543    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4544    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4545    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4546    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4547    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4548    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4549    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4550    bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4551    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4552    bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4553 
4554    bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4555    bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4556    bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4557 
4558    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4559    bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4560    bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4561    bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4562    bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4563    bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4564    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4565    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4566    bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4567    bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4568 
4569    bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4570    bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4571 
4572    if (params->gs_iface) {
4573       /* There's no specific value for this because it should always
4574        * be set, but apps using ext_geometry_shader4 quite often
4575        * were forgetting so we're using MAX_VERTEX_VARYING from
4576        * that spec even though we could debug_assert if it's not
4577        * set, but that's a lot uglier. */
4578       uint max_output_vertices;
4579 
4580       /* inputs are always indirect with gs */
4581       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4582       bld.gs_iface = params->gs_iface;
4583       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4584       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4585       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4586 
4587       max_output_vertices =
4588          params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4589       if (!max_output_vertices)
4590          max_output_vertices = 32;
4591 
4592       bld.max_output_vertices_vec =
4593          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4594                                 max_output_vertices);
4595    }
4596 
4597    if (params->tes_iface) {
4598       /* inputs are always indirect with tes */
4599       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4600       bld.tes_iface = params->tes_iface;
4601       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4602    }
4603 
4604    if (params->tcs_iface) {
4605       bld.tcs_iface = params->tcs_iface;
4606       /* outputs and inputs are always indirect with tcs */
4607       bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4608       bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4609       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4610       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4611       bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4612       bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4613    }
4614 
4615    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4616 
4617    bld.system_values = *params->system_values;
4618 
4619    lp_build_tgsi_llvm(&bld.bld_base, tokens);
4620 
4621    if (0) {
4622       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4623       LLVMValueRef function = LLVMGetBasicBlockParent(block);
4624       debug_printf("11111111111111111111111111111 \n");
4625       tgsi_dump(tokens, 0);
4626       lp_debug_dump_value(function);
4627       debug_printf("2222222222222222222222222222 \n");
4628    }
4629 
4630    if (0) {
4631       LLVMModuleRef module = LLVMGetGlobalParent(
4632          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4633       LLVMDumpModule(module);
4634 
4635    }
4636    lp_exec_mask_fini(&bld.exec_mask);
4637 }
4638