1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "pipe/p_config.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_prim.h"
45 #include "tgsi/tgsi_dump.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_info.h"
48 #include "tgsi/tgsi_parse.h"
49 #include "tgsi/tgsi_util.h"
50 #include "tgsi/tgsi_scan.h"
51 #include "tgsi/tgsi_strings.h"
52 #include "lp_bld_tgsi_action.h"
53 #include "lp_bld_type.h"
54 #include "lp_bld_const.h"
55 #include "lp_bld_arit.h"
56 #include "lp_bld_bitarit.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_init.h"
59 #include "lp_bld_logic.h"
60 #include "lp_bld_misc.h"
61 #include "lp_bld_swizzle.h"
62 #include "lp_bld_flow.h"
63 #include "lp_bld_coro.h"
64 #include "lp_bld_quad.h"
65 #include "lp_bld_tgsi.h"
66 #include "lp_bld_limits.h"
67 #include "lp_bld_debug.h"
68 #include "lp_bld_printf.h"
69 #include "lp_bld_sample.h"
70 #include "lp_bld_struct.h"
71
72 #define DUMP_GS_EMITS 0
73
74 /*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82 #define DEBUG_EXECUTION 0
83
84
85 /*
86 * Emit code to print a register value.
87 */
88 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)89 emit_dump_reg(struct gallivm_state *gallivm,
90 unsigned file,
91 unsigned index,
92 unsigned chan,
93 LLVMValueRef value)
94 {
95 char buf[32];
96
97 snprintf(buf, sizeof buf, " %s[%u].%c = ",
98 tgsi_file_name(file),
99 index, "xyzw"[chan]);
100
101 lp_build_print_value(gallivm, buf, value);
102 }
103
104 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)105 func_ctx(struct lp_exec_mask *mask)
106 {
107 assert(mask->function_stack_size > 0);
108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109 return &mask->function_stack[mask->function_stack_size - 1];
110 }
111
112 /*
113 * combine the execution mask if there is one with the current mask.
114 */
115 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120 struct lp_exec_mask *exec_mask = &bld->exec_mask;
121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122 if (!exec_mask->has_mask) {
123 return bld_mask;
124 }
125 if (!bld_mask)
126 return exec_mask->exec_mask;
127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128 exec_mask->exec_mask, "");
129 }
130
lp_exec_tgsi_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132 struct lp_build_tgsi_context * bld_base)
133 {
134 enum tgsi_opcode opcode =
135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137 opcode == TGSI_OPCODE_CASE);
138 lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)141 static void lp_exec_switch(struct lp_exec_mask *mask,
142 LLVMValueRef switchval)
143 {
144 struct function_ctx *ctx = func_ctx(mask);
145
146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148 ctx->switch_stack_size++;
149 return;
150 }
151
152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153 ctx->break_type;
154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161 ctx->switch_stack_size++;
162
163 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164 ctx->switch_val = switchval;
165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166 ctx->switch_in_default = false;
167 ctx->switch_pc = 0;
168
169 lp_exec_mask_update(mask);
170 }
171
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173 struct lp_build_tgsi_context * bld_base)
174 {
175 LLVMBuilderRef builder = mask->bld->gallivm->builder;
176 struct function_ctx *ctx = func_ctx(mask);
177
178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179 ctx->switch_stack_size--;
180 return;
181 }
182
183 /* check if there's deferred default if so do it now */
184 if (ctx->switch_pc && !ctx->switch_in_default) {
185 LLVMValueRef prevmask, defaultmask;
186 unsigned tmp_pc;
187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190 ctx->switch_in_default = true;
191
192 lp_exec_mask_update(mask);
193
194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195 TGSI_OPCODE_DEFAULT);
196
197 tmp_pc = bld_base->pc;
198 bld_base->pc = ctx->switch_pc;
199 /*
200 * re-purpose switch_pc to point to here again, since we stop execution of
201 * the deferred default after next break.
202 */
203 ctx->switch_pc = tmp_pc - 1;
204
205 return;
206 }
207
208 else if (ctx->switch_pc && ctx->switch_in_default) {
209 assert(bld_base->pc == ctx->switch_pc + 1);
210 }
211
212 ctx->switch_stack_size--;
213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221 lp_exec_mask_update(mask);
222 }
223
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)224 static void lp_exec_case(struct lp_exec_mask *mask,
225 LLVMValueRef caseval)
226 {
227 LLVMBuilderRef builder = mask->bld->gallivm->builder;
228 struct function_ctx *ctx = func_ctx(mask);
229
230 LLVMValueRef casemask, prevmask;
231
232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233 return;
234 }
235
236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237 if (!ctx->switch_in_default) {
238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241 ctx->switch_mask_default, "sw_default_mask");
242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245 lp_exec_mask_update(mask);
246 }
247 }
248
249 /*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 * if default wasn't last but there's no
254 * fallthrough into default.
255 */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)256 static boolean default_analyse_is_last(struct lp_exec_mask *mask,
257 struct lp_build_tgsi_context * bld_base,
258 int *default_pc_start)
259 {
260 unsigned pc = bld_base->pc;
261 struct function_ctx *ctx = func_ctx(mask);
262 int curr_switch_stack = ctx->switch_stack_size;
263
264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265 return false;
266 }
267
268 /* skip over case statements which are together with default */
269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270 pc++;
271 }
272
273 while (pc != ~0u && pc < bld_base->num_instructions) {
274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275 switch (opcode) {
276 case TGSI_OPCODE_CASE:
277 if (curr_switch_stack == ctx->switch_stack_size) {
278 *default_pc_start = pc - 1;
279 return false;
280 }
281 break;
282 case TGSI_OPCODE_SWITCH:
283 curr_switch_stack++;
284 break;
285 case TGSI_OPCODE_ENDSWITCH:
286 if (curr_switch_stack == ctx->switch_stack_size) {
287 *default_pc_start = pc - 1;
288 return true;
289 }
290 curr_switch_stack--;
291 break;
292 default:
293 ; /* nothing */
294 }
295 pc++;
296 }
297 /* should never arrive here */
298 assert(0);
299 return true;
300 }
301
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)302 static void lp_exec_default(struct lp_exec_mask *mask,
303 struct lp_build_tgsi_context * bld_base)
304 {
305 LLVMBuilderRef builder = mask->bld->gallivm->builder;
306 struct function_ctx *ctx = func_ctx(mask);
307
308 int default_exec_pc = 0;
309 boolean default_is_last;
310
311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312 return;
313 }
314
315 /*
316 * This is a messy opcode, because it may not be always at the end and
317 * there can be fallthrough in and out of it.
318 */
319
320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321 /*
322 * If it is last statement in switch (note that case statements appearing
323 * "at the same time" as default don't change that) everything is just fine,
324 * update switch mask and go on. This means we can handle default with
325 * fallthrough INTO it without overhead, if it is last.
326 */
327 if (default_is_last) {
328 LLVMValueRef prevmask, defaultmask;
329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333 ctx->switch_in_default = true;
334
335 lp_exec_mask_update(mask);
336 }
337 else {
338 /*
339 * Technically, "case" immediately before default isn't really a
340 * fallthrough, however we still have to count them as such as we
341 * already have updated the masks.
342 * If that happens in practice could add a switch optimizer pass
343 * which just gets rid of all case statements appearing together with
344 * default (or could do switch analysis at switch start time instead).
345 */
346 enum tgsi_opcode opcode =
347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348 boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
349 opcode != TGSI_OPCODE_SWITCH);
350 /*
351 * If it is not last statement and there was no fallthrough into it,
352 * we record the PC and continue execution at next case (again, those
353 * case encountered at the same time don't count). At endswitch
354 * time, we update switchmask, and go back executing the code we skipped
355 * until the next break (possibly re-executing some code with changed mask
356 * if there was a fallthrough out of default).
357 * Finally, if it is not last statement and there was a fallthrough into it,
358 * do the same as with the former case, except instead of skipping the code
359 * just execute it without updating the mask, then go back and re-execute.
360 */
361 ctx->switch_pc = bld_base->pc;
362 if (!ft_into) {
363 bld_base->pc = default_exec_pc;
364 }
365 }
366 }
367
368
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370 int func,
371 int *pc)
372 {
373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374 return;
375 }
376
377 lp_exec_mask_function_init(mask, mask->function_stack_size);
378 mask->function_stack[mask->function_stack_size].pc = *pc;
379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380 mask->function_stack_size++;
381 *pc = func;
382 }
383
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMBuilderRef builder = mask->bld->gallivm->builder;
387 struct function_ctx *ctx = func_ctx(mask);
388 LLVMValueRef exec_mask;
389
390 if (ctx->cond_stack_size == 0 &&
391 ctx->loop_stack_size == 0 &&
392 ctx->switch_stack_size == 0 &&
393 mask->function_stack_size == 1) {
394 /* returning from main() */
395 *pc = -1;
396 return;
397 }
398
399 if (mask->function_stack_size == 1) {
400 /*
401 * This requires special handling since we need to ensure
402 * we don't drop the mask even if we have no call stack
403 * (e.g. after a ret in a if clause after the endif)
404 */
405 mask->ret_in_main = TRUE;
406 }
407
408 exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 struct function_ctx *ctx;
426
427 assert(mask->function_stack_size > 1);
428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430 ctx = func_ctx(mask);
431 mask->function_stack_size--;
432
433 *pc = ctx->pc;
434 mask->ret_mask = ctx->ret_mask;
435
436 lp_exec_mask_update(mask);
437 }
438
439
440 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442 unsigned file,
443 int index,
444 unsigned chan)
445 {
446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448 LLVMValueRef var_of_array;
449
450 switch (file) {
451 case TGSI_FILE_TEMPORARY:
452 array_of_vars = bld->temps;
453 var_of_array = bld->temps_array;
454 break;
455 case TGSI_FILE_OUTPUT:
456 array_of_vars = bld->outputs;
457 var_of_array = bld->outputs_array;
458 break;
459 default:
460 assert(0);
461 return NULL;
462 }
463
464 assert(chan < 4);
465
466 if (bld->indirect_files & (1 << file)) {
467 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
468 if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
469 LLVMValueRef gep[2];
470 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
471 gep[1] = lindex;
472 return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
473 } else {
474 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
475 }
476 }
477 else {
478 assert(index <= bld->bld_base.info->file_max[file]);
479 return array_of_vars[index][chan];
480 }
481 }
482
483
484 /**
485 * Return pointer to a temporary register channel (src or dest).
486 * Note that indirect addressing cannot be handled here.
487 * \param index which temporary register
488 * \param chan which channel of the temp register.
489 */
490 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)491 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
492 unsigned index,
493 unsigned chan)
494 {
495 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
496 }
497
498 /**
499 * Return pointer to a output register channel (src or dest).
500 * Note that indirect addressing cannot be handled here.
501 * \param index which output register
502 * \param chan which channel of the output register.
503 */
504 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)505 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
506 unsigned index,
507 unsigned chan)
508 {
509 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
510 }
511
512 /*
513 * If we have indirect addressing in outputs copy our alloca array
514 * to the outputs slots specified by the caller to make sure
515 * our outputs are delivered consistently via the same interface.
516 */
517 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)518 gather_outputs(struct lp_build_tgsi_soa_context * bld)
519 {
520 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
521 unsigned index, chan;
522 assert(bld->bld_base.info->num_outputs <=
523 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
524 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
525 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
526 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
527 }
528 }
529 }
530 }
531
532 /**
533 * Gather vector.
534 * XXX the lp_build_gather() function should be capable of doing this
535 * with a little work.
536 */
537 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)538 build_gather(struct lp_build_tgsi_context *bld_base,
539 LLVMValueRef base_ptr,
540 LLVMValueRef indexes,
541 LLVMValueRef overflow_mask,
542 LLVMValueRef indexes2)
543 {
544 struct gallivm_state *gallivm = bld_base->base.gallivm;
545 LLVMBuilderRef builder = gallivm->builder;
546 struct lp_build_context *uint_bld = &bld_base->uint_bld;
547 struct lp_build_context *bld = &bld_base->base;
548 LLVMValueRef res;
549 unsigned i;
550
551 if (indexes2)
552 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
553 else
554 res = bld->undef;
555 /*
556 * overflow_mask is a vector telling us which channels
557 * in the vector overflowed. We use the overflow behavior for
558 * constant buffers which is defined as:
559 * Out of bounds access to constant buffer returns 0 in all
560 * components. Out of bounds behavior is always with respect
561 * to the size of the buffer bound at that slot.
562 */
563
564 if (overflow_mask) {
565 /*
566 * We avoid per-element control flow here (also due to llvm going crazy,
567 * though I suspect it's better anyway since overflow is likely rare).
568 * Note that since we still fetch from buffers even if num_elements was
569 * zero (in this case we'll fetch from index zero) the jit func callers
570 * MUST provide valid fake constant buffers of size 4x32 (the values do
571 * not matter), otherwise we'd still need (not per element though)
572 * control flow.
573 */
574 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
575 if (indexes2)
576 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
577 }
578
579 /*
580 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
581 */
582 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
583 LLVMValueRef si, di;
584 LLVMValueRef index;
585 LLVMValueRef scalar_ptr, scalar;
586
587 di = lp_build_const_int32(bld->gallivm, i);
588 if (indexes2)
589 si = lp_build_const_int32(bld->gallivm, i >> 1);
590 else
591 si = di;
592
593 if (indexes2 && (i & 1)) {
594 index = LLVMBuildExtractElement(builder,
595 indexes2, si, "");
596 } else {
597 index = LLVMBuildExtractElement(builder,
598 indexes, si, "");
599 }
600 scalar_ptr = LLVMBuildGEP(builder, base_ptr,
601 &index, 1, "gather_ptr");
602 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
603
604 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
605 }
606
607 if (overflow_mask) {
608 if (indexes2) {
609 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
610 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
611 bld_base->dbl_bld.int_vec_type, "");
612 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
613 bld_base->dbl_bld.zero, res);
614 } else
615 res = lp_build_select(bld, overflow_mask, bld->zero, res);
616 }
617
618 return res;
619 }
620
621
622 /**
623 * Scatter/store vector.
624 */
625 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)626 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
627 LLVMValueRef base_ptr,
628 LLVMValueRef indexes,
629 LLVMValueRef values,
630 struct lp_exec_mask *mask)
631 {
632 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
633 LLVMBuilderRef builder = gallivm->builder;
634 unsigned i;
635 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
636
637 /*
638 * Loop over elements of index_vec, store scalar value.
639 */
640 for (i = 0; i < bld->bld_base.base.type.length; i++) {
641 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
642 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
643 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
644 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
645 LLVMValueRef scalar_pred = pred ?
646 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
647
648 if (0)
649 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
650 ii, val, index, scalar_ptr);
651
652 if (scalar_pred) {
653 LLVMValueRef real_val, dst_val;
654 dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
655 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
656 LLVMBuildStore(builder, real_val, scalar_ptr);
657 }
658 else {
659 LLVMBuildStore(builder, val, scalar_ptr);
660 }
661 }
662 }
663
664
665 /**
666 * Read the current value of the ADDR register, convert the floats to
667 * ints, add the base index and return the vector of offsets.
668 * The offsets will be used to index into the constant buffer or
669 * temporary register file.
670 */
671 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg,int index_limit)672 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
673 unsigned reg_file, unsigned reg_index,
674 const struct tgsi_ind_register *indirect_reg,
675 int index_limit)
676 {
677 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
678 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
679 /* always use X component of address register */
680 unsigned swizzle = indirect_reg->Swizzle;
681 LLVMValueRef base;
682 LLVMValueRef rel;
683 LLVMValueRef max_index;
684 LLVMValueRef index;
685
686 assert(bld->indirect_files & (1 << reg_file));
687
688 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
689
690 assert(swizzle < 4);
691 switch (indirect_reg->File) {
692 case TGSI_FILE_ADDRESS:
693 rel = LLVMBuildLoad(builder,
694 bld->addr[indirect_reg->Index][swizzle],
695 "load addr reg");
696 /* ADDR LLVM values already have LLVM integer type. */
697 break;
698 case TGSI_FILE_TEMPORARY:
699 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
700 rel = LLVMBuildLoad(builder, rel, "load temp reg");
701 /* TEMP LLVM values always have LLVM float type, but for indirection, the
702 * value actually stored is expected to be an integer */
703 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
704 break;
705 default:
706 assert(0);
707 rel = uint_bld->zero;
708 }
709
710 index = lp_build_add(uint_bld, base, rel);
711
712 /*
713 * emit_fetch_constant handles constant buffer overflow so this code
714 * is pointless for them.
715 * Furthermore the D3D10 spec in section 6.5 says:
716 * If the constant buffer bound to a slot is larger than the size
717 * declared in the shader for that slot, implementations are allowed
718 * to return incorrect data (not necessarily 0) for indices that are
719 * larger than the declared size but smaller than the buffer size.
720 */
721 if (reg_file != TGSI_FILE_CONSTANT) {
722 assert(index_limit >= 0);
723 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
724 uint_bld->type, index_limit);
725
726 assert(!uint_bld->type.sign);
727 index = lp_build_min(uint_bld, index, max_index);
728 }
729
730 return index;
731 }
732
733 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)734 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
735 enum tgsi_opcode_type stype)
736 {
737 struct lp_build_context *bld_fetch;
738
739 switch (stype) {
740 case TGSI_TYPE_FLOAT:
741 case TGSI_TYPE_UNTYPED:
742 bld_fetch = &bld_base->base;
743 break;
744 case TGSI_TYPE_UNSIGNED:
745 bld_fetch = &bld_base->uint_bld;
746 break;
747 case TGSI_TYPE_SIGNED:
748 bld_fetch = &bld_base->int_bld;
749 break;
750 case TGSI_TYPE_DOUBLE:
751 bld_fetch = &bld_base->dbl_bld;
752 break;
753 case TGSI_TYPE_UNSIGNED64:
754 bld_fetch = &bld_base->uint64_bld;
755 break;
756 case TGSI_TYPE_SIGNED64:
757 bld_fetch = &bld_base->int64_bld;
758 break;
759 case TGSI_TYPE_VOID:
760 default:
761 assert(0);
762 bld_fetch = NULL;
763 break;
764 }
765 return bld_fetch;
766 }
767
768 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,boolean need_perelement_offset)769 get_soa_array_offsets(struct lp_build_context *uint_bld,
770 LLVMValueRef indirect_index,
771 unsigned chan_index,
772 boolean need_perelement_offset)
773 {
774 struct gallivm_state *gallivm = uint_bld->gallivm;
775 LLVMValueRef chan_vec =
776 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
777 LLVMValueRef length_vec =
778 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
779 LLVMValueRef index_vec;
780
781 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
782 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
783 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
784 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
785
786 if (need_perelement_offset) {
787 LLVMValueRef pixel_offsets;
788 unsigned i;
789 /* build pixel offset vector: {0, 1, 2, 3, ...} */
790 pixel_offsets = uint_bld->undef;
791 for (i = 0; i < uint_bld->type.length; i++) {
792 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
793 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
794 ii, ii, "");
795 }
796 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
797 }
798 return index_vec;
799 }
800
801 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)802 emit_fetch_constant(
803 struct lp_build_tgsi_context * bld_base,
804 const struct tgsi_full_src_register * reg,
805 enum tgsi_opcode_type stype,
806 unsigned swizzle_in)
807 {
808 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
809 struct gallivm_state *gallivm = bld_base->base.gallivm;
810 LLVMBuilderRef builder = gallivm->builder;
811 struct lp_build_context *uint_bld = &bld_base->uint_bld;
812 unsigned dimension = 0;
813 LLVMValueRef consts_ptr;
814 LLVMValueRef num_consts;
815 LLVMValueRef res;
816 unsigned swizzle = swizzle_in & 0xffff;
817
818 /* XXX: Handle fetching xyzw components as a vector */
819 assert(swizzle != ~0u);
820
821 if (reg->Register.Dimension) {
822 assert(!reg->Dimension.Indirect);
823 dimension = reg->Dimension.Index;
824 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
825 }
826
827 consts_ptr = bld->consts[dimension];
828 num_consts = bld->consts_sizes[dimension];
829
830 if (reg->Register.Indirect) {
831 LLVMValueRef indirect_index;
832 LLVMValueRef swizzle_vec =
833 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
834 LLVMValueRef index_vec; /* index into the const buffer */
835 LLVMValueRef overflow_mask;
836 LLVMValueRef index_vec2 = NULL;
837
838 indirect_index = get_indirect_index(bld,
839 reg->Register.File,
840 reg->Register.Index,
841 ®->Indirect,
842 bld->bld_base.info->file_max[reg->Register.File]);
843
844 /* All fetches are from the same constant buffer, so
845 * we need to propagate the size to a vector to do a
846 * vector comparison */
847 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
848 /* Construct a boolean vector telling us which channels
849 * overflow the bound constant buffer */
850 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
851 indirect_index, num_consts);
852
853 /* index_vec = indirect_index * 4 + swizzle */
854 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
855 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
856
857 if (tgsi_type_is_64bit(stype)) {
858 LLVMValueRef swizzle_vec2;
859 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
860 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
861 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
862 }
863 /* Gather values from the constant buffer */
864 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
865 }
866 else {
867 LLVMValueRef index; /* index into the const buffer */
868 LLVMValueRef scalar, scalar_ptr;
869 struct lp_build_context *bld_broad = &bld_base->base;
870 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
871
872 scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
873 &index, 1, "");
874
875 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
876
877 LLVMValueRef scalar2, scalar2_ptr;
878 LLVMValueRef shuffles[2];
879 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
880
881 scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
882 &index, 1, "");
883
884 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
885 scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
886 shuffles[0] = lp_build_const_int32(gallivm, 0);
887 shuffles[1] = lp_build_const_int32(gallivm, 1);
888
889 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
890 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
891 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
892 } else {
893 if (stype == TGSI_TYPE_DOUBLE) {
894 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
895 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
896 bld_broad = &bld_base->dbl_bld;
897 } else if (stype == TGSI_TYPE_UNSIGNED64) {
898 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
899 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
900 bld_broad = &bld_base->uint64_bld;
901 } else if (stype == TGSI_TYPE_SIGNED64) {
902 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
903 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
904 bld_broad = &bld_base->int64_bld;
905 }
906 scalar = LLVMBuildLoad(builder, scalar_ptr, "");
907 res = lp_build_broadcast_scalar(bld_broad, scalar);
908 }
909
910 }
911
912 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
913 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
914 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
915 }
916
917 return res;
918 }
919
920 /**
921 * Fetch 64-bit values from two separate channels.
922 * 64-bit values are stored split across two channels, like xy and zw.
923 * This function creates a set of vec_length*2 floats,
924 * extracts the values from the two channels,
925 * puts them in the correct place, then casts to vec_length 64-bits.
926 */
927 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)928 emit_fetch_64bit(
929 struct lp_build_tgsi_context * bld_base,
930 enum tgsi_opcode_type stype,
931 LLVMValueRef input,
932 LLVMValueRef input2)
933 {
934 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
935 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
936 LLVMBuilderRef builder = gallivm->builder;
937 LLVMValueRef res;
938 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
939 int i;
940 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
941 int len = bld_base->base.type.length * 2;
942 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
943
944 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
945 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
946 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
947 }
948 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
949
950 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
951 }
952
953 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)954 emit_fetch_immediate(
955 struct lp_build_tgsi_context * bld_base,
956 const struct tgsi_full_src_register * reg,
957 enum tgsi_opcode_type stype,
958 unsigned swizzle_in)
959 {
960 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
961 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
962 LLVMBuilderRef builder = gallivm->builder;
963 LLVMValueRef res = NULL;
964 unsigned swizzle = swizzle_in & 0xffff;
965
966 if (bld->use_immediates_array || reg->Register.Indirect) {
967 LLVMValueRef imms_array;
968 LLVMTypeRef fptr_type;
969
970 /* cast imms_array pointer to float* */
971 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
972 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
973
974 if (reg->Register.Indirect) {
975 LLVMValueRef indirect_index;
976 LLVMValueRef index_vec; /* index into the immediate register array */
977 LLVMValueRef index_vec2 = NULL;
978 indirect_index = get_indirect_index(bld,
979 reg->Register.File,
980 reg->Register.Index,
981 ®->Indirect,
982 bld->bld_base.info->file_max[reg->Register.File]);
983 /*
984 * Unlike for other reg classes, adding pixel offsets is unnecessary -
985 * immediates are stored as full vectors (FIXME??? - might be better
986 * to store them the same as constants) but all elements are the same
987 * in any case.
988 */
989 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
990 indirect_index,
991 swizzle,
992 FALSE);
993 if (tgsi_type_is_64bit(stype))
994 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
995 indirect_index,
996 swizzle_in >> 16,
997 FALSE);
998 /* Gather values from the immediate register array */
999 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1000 } else {
1001 LLVMValueRef gep[2];
1002 gep[0] = lp_build_const_int32(gallivm, 0);
1003 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1004 LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
1005 bld->imms_array, gep, 2, "");
1006 res = LLVMBuildLoad(builder, imms_ptr, "");
1007
1008 if (tgsi_type_is_64bit(stype)) {
1009 LLVMValueRef imms_ptr2;
1010 LLVMValueRef res2;
1011 gep[1] = lp_build_const_int32(gallivm,
1012 reg->Register.Index * 4 + (swizzle_in >> 16));
1013 imms_ptr2 = LLVMBuildGEP(builder,
1014 bld->imms_array, gep, 2, "");
1015 res2 = LLVMBuildLoad(builder, imms_ptr2, "");
1016 res = emit_fetch_64bit(bld_base, stype, res, res2);
1017 }
1018 }
1019 }
1020 else {
1021 res = bld->immediates[reg->Register.Index][swizzle];
1022 if (tgsi_type_is_64bit(stype))
1023 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1024 }
1025
1026 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1027 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1028 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1029 }
1030 return res;
1031 }
1032
1033 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1034 emit_fetch_input(
1035 struct lp_build_tgsi_context * bld_base,
1036 const struct tgsi_full_src_register * reg,
1037 enum tgsi_opcode_type stype,
1038 unsigned swizzle_in)
1039 {
1040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1041 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1042 LLVMBuilderRef builder = gallivm->builder;
1043 LLVMValueRef res;
1044 unsigned swizzle = swizzle_in & 0xffff;
1045
1046 if (reg->Register.Indirect) {
1047 LLVMValueRef indirect_index;
1048 LLVMValueRef index_vec; /* index into the input reg array */
1049 LLVMValueRef index_vec2 = NULL;
1050 LLVMValueRef inputs_array;
1051 LLVMTypeRef fptr_type;
1052
1053 indirect_index = get_indirect_index(bld,
1054 reg->Register.File,
1055 reg->Register.Index,
1056 ®->Indirect,
1057 bld->bld_base.info->file_max[reg->Register.File]);
1058
1059 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1060 indirect_index,
1061 swizzle,
1062 TRUE);
1063 if (tgsi_type_is_64bit(stype)) {
1064 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1065 indirect_index,
1066 swizzle_in >> 16,
1067 TRUE);
1068 }
1069 /* cast inputs_array pointer to float* */
1070 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1071 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1072
1073 /* Gather values from the input register array */
1074 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1075 } else {
1076 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1077 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1078 reg->Register.Index * 4 + swizzle);
1079 LLVMValueRef input_ptr = LLVMBuildGEP(builder,
1080 bld->inputs_array, &lindex, 1, "");
1081
1082 res = LLVMBuildLoad(builder, input_ptr, "");
1083 if (tgsi_type_is_64bit(stype)) {
1084 LLVMValueRef lindex1;
1085 LLVMValueRef input_ptr2;
1086 LLVMValueRef res2;
1087
1088 lindex1 = lp_build_const_int32(gallivm,
1089 reg->Register.Index * 4 + (swizzle_in >> 16));
1090 input_ptr2 = LLVMBuildGEP(builder,
1091 bld->inputs_array, &lindex1, 1, "");
1092 res2 = LLVMBuildLoad(builder, input_ptr2, "");
1093 res = emit_fetch_64bit(bld_base, stype, res, res2);
1094 }
1095 }
1096 else {
1097 res = bld->inputs[reg->Register.Index][swizzle];
1098 if (tgsi_type_is_64bit(stype))
1099 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1100 }
1101 }
1102
1103 assert(res);
1104
1105 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1106 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1107 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1108 }
1109
1110 return res;
1111 }
1112
1113
1114 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1115 emit_fetch_gs_input(
1116 struct lp_build_tgsi_context * bld_base,
1117 const struct tgsi_full_src_register * reg,
1118 enum tgsi_opcode_type stype,
1119 unsigned swizzle_in)
1120 {
1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1122 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1123 const struct tgsi_shader_info *info = bld->bld_base.info;
1124 LLVMBuilderRef builder = gallivm->builder;
1125 LLVMValueRef attrib_index = NULL;
1126 LLVMValueRef vertex_index = NULL;
1127 unsigned swizzle = swizzle_in & 0xffff;
1128 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1129 LLVMValueRef res;
1130
1131 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1132 /* This is really a system value not a regular input */
1133 assert(!reg->Register.Indirect);
1134 assert(!reg->Dimension.Indirect);
1135 res = bld->system_values.prim_id;
1136 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1137 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1138 }
1139 return res;
1140 }
1141
1142 if (reg->Register.Indirect) {
1143 /*
1144 * XXX: this is possibly not quite the right value, since file_max may be
1145 * larger than the max attrib index, due to it being the max of declared
1146 * inputs AND the max vertices per prim (which is 6 for tri adj).
1147 * It should however be safe to use (since we always allocate
1148 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1149 */
1150 int index_limit = info->file_max[reg->Register.File];
1151 attrib_index = get_indirect_index(bld,
1152 reg->Register.File,
1153 reg->Register.Index,
1154 ®->Indirect,
1155 index_limit);
1156 } else {
1157 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1158 }
1159
1160 if (reg->Dimension.Indirect) {
1161 /*
1162 * A fixed 6 should do as well (which is what we allocate).
1163 */
1164 int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1165 vertex_index = get_indirect_index(bld,
1166 reg->Register.File,
1167 reg->Dimension.Index,
1168 ®->DimIndirect,
1169 index_limit);
1170 } else {
1171 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1172 }
1173
1174 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1175 reg->Dimension.Indirect,
1176 vertex_index,
1177 reg->Register.Indirect,
1178 attrib_index,
1179 swizzle_index);
1180
1181 assert(res);
1182 if (tgsi_type_is_64bit(stype)) {
1183 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1184 LLVMValueRef res2;
1185 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1186 reg->Dimension.Indirect,
1187 vertex_index,
1188 reg->Register.Indirect,
1189 attrib_index,
1190 swizzle_index);
1191 assert(res2);
1192 res = emit_fetch_64bit(bld_base, stype, res, res2);
1193 } else if (stype == TGSI_TYPE_UNSIGNED) {
1194 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1195 } else if (stype == TGSI_TYPE_SIGNED) {
1196 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1197 }
1198
1199 return res;
1200 }
1201
1202 static LLVMValueRef
emit_fetch_tcs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1203 emit_fetch_tcs_input(
1204 struct lp_build_tgsi_context * bld_base,
1205 const struct tgsi_full_src_register * reg,
1206 enum tgsi_opcode_type stype,
1207 unsigned swizzle_in)
1208 {
1209 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1210 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1211 const struct tgsi_shader_info *info = bld->bld_base.info;
1212 LLVMBuilderRef builder = gallivm->builder;
1213 LLVMValueRef attrib_index = NULL;
1214 LLVMValueRef vertex_index = NULL;
1215 unsigned swizzle = swizzle_in & 0xffff;
1216 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1217 LLVMValueRef res;
1218
1219 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1220 /* This is really a system value not a regular input */
1221 assert(!reg->Register.Indirect);
1222 assert(!reg->Dimension.Indirect);
1223 res = bld->system_values.prim_id;
1224 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1225 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1226 }
1227 return res;
1228 }
1229
1230 if (reg->Register.Indirect) {
1231 int index_limit = info->file_max[reg->Register.File];
1232 attrib_index = get_indirect_index(bld,
1233 reg->Register.File,
1234 reg->Register.Index,
1235 ®->Indirect,
1236 index_limit);
1237 } else {
1238 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1239 }
1240
1241 if (reg->Dimension.Indirect) {
1242 vertex_index = get_indirect_index(bld,
1243 reg->Register.File,
1244 reg->Dimension.Index,
1245 ®->DimIndirect,
1246 PIPE_MAX_SHADER_INPUTS);
1247 } else {
1248 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1249 }
1250
1251 // TCS can read from its own outputs
1252 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1253 res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1254 reg->Dimension.Indirect,
1255 vertex_index,
1256 reg->Register.Indirect,
1257 attrib_index,
1258 FALSE,
1259 swizzle_index,
1260 bld_base->info->output_semantic_name[reg->Register.Index]);
1261 } else {
1262 res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1263 reg->Dimension.Indirect,
1264 vertex_index,
1265 reg->Register.Indirect,
1266 attrib_index,
1267 FALSE,
1268 swizzle_index);
1269 }
1270
1271
1272 assert(res);
1273 if (tgsi_type_is_64bit(stype)) {
1274 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1275 LLVMValueRef res2;
1276 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1277 res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1278 reg->Dimension.Indirect,
1279 vertex_index,
1280 reg->Register.Indirect,
1281 attrib_index,
1282 FALSE,
1283 swizzle_index,
1284 bld_base->info->output_semantic_name[reg->Register.Index]);
1285 } else {
1286 res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1287 reg->Dimension.Indirect,
1288 vertex_index,
1289 reg->Register.Indirect,
1290 attrib_index,
1291 FALSE,
1292 swizzle_index);
1293 }
1294 assert(res2);
1295 res = emit_fetch_64bit(bld_base, stype, res, res2);
1296 } else if (stype == TGSI_TYPE_UNSIGNED) {
1297 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1298 } else if (stype == TGSI_TYPE_SIGNED) {
1299 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1300 }
1301
1302 return res;
1303 }
1304
1305 static LLVMValueRef
emit_fetch_tes_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1306 emit_fetch_tes_input(
1307 struct lp_build_tgsi_context * bld_base,
1308 const struct tgsi_full_src_register * reg,
1309 enum tgsi_opcode_type stype,
1310 unsigned swizzle_in)
1311 {
1312 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1313 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1314 const struct tgsi_shader_info *info = bld->bld_base.info;
1315 LLVMBuilderRef builder = gallivm->builder;
1316 LLVMValueRef attrib_index = NULL;
1317 LLVMValueRef vertex_index = NULL;
1318 unsigned swizzle = swizzle_in & 0xffff;
1319 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1320 LLVMValueRef res;
1321
1322 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1323 /* This is really a system value not a regular input */
1324 assert(!reg->Register.Indirect);
1325 assert(!reg->Dimension.Indirect);
1326 res = bld->system_values.prim_id;
1327 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1328 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1329 }
1330 return res;
1331 }
1332
1333 if (reg->Register.Indirect) {
1334 int index_limit = info->file_max[reg->Register.File];
1335 attrib_index = get_indirect_index(bld,
1336 reg->Register.File,
1337 reg->Register.Index,
1338 ®->Indirect,
1339 index_limit);
1340 } else {
1341 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1342 }
1343
1344 if (reg->Dimension.Indirect) {
1345 vertex_index = get_indirect_index(bld,
1346 reg->Register.File,
1347 reg->Dimension.Index,
1348 ®->DimIndirect,
1349 PIPE_MAX_SHADER_INPUTS);
1350 } else {
1351 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1352 }
1353
1354 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1355 res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1356 reg->Register.Indirect,
1357 attrib_index,
1358 swizzle_index);
1359 } else {
1360 res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1361 reg->Dimension.Indirect,
1362 vertex_index,
1363 reg->Register.Indirect,
1364 attrib_index,
1365 FALSE,
1366 swizzle_index);
1367 }
1368
1369 assert(res);
1370 if (tgsi_type_is_64bit(stype)) {
1371 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1372 LLVMValueRef res2;
1373 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1374 res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1375 reg->Register.Indirect,
1376 attrib_index,
1377 swizzle_index);
1378 }
1379 else {
1380 res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1381 reg->Dimension.Indirect,
1382 vertex_index,
1383 reg->Register.Indirect,
1384 attrib_index,
1385 FALSE,
1386 swizzle_index);
1387 }
1388 assert(res2);
1389 res = emit_fetch_64bit(bld_base, stype, res, res2);
1390 } else if (stype == TGSI_TYPE_UNSIGNED) {
1391 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1392 } else if (stype == TGSI_TYPE_SIGNED) {
1393 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1394 }
1395
1396 return res;
1397 }
1398
1399
1400
1401 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1402 emit_fetch_temporary(
1403 struct lp_build_tgsi_context * bld_base,
1404 const struct tgsi_full_src_register * reg,
1405 enum tgsi_opcode_type stype,
1406 unsigned swizzle_in)
1407 {
1408 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1409 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1410 LLVMBuilderRef builder = gallivm->builder;
1411 LLVMValueRef res;
1412 unsigned swizzle = swizzle_in & 0xffff;
1413
1414 if (reg->Register.Indirect) {
1415 LLVMValueRef indirect_index;
1416 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1417 LLVMValueRef temps_array;
1418 LLVMTypeRef fptr_type;
1419
1420 indirect_index = get_indirect_index(bld,
1421 reg->Register.File,
1422 reg->Register.Index,
1423 ®->Indirect,
1424 bld->bld_base.info->file_max[reg->Register.File]);
1425
1426 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1427 indirect_index,
1428 swizzle,
1429 TRUE);
1430 if (tgsi_type_is_64bit(stype)) {
1431 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1432 indirect_index,
1433 swizzle_in >> 16,
1434 TRUE);
1435 }
1436
1437 /* cast temps_array pointer to float* */
1438 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1439 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1440
1441 /* Gather values from the temporary register array */
1442 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1443 }
1444 else {
1445 LLVMValueRef temp_ptr;
1446 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1447 res = LLVMBuildLoad(builder, temp_ptr, "");
1448
1449 if (tgsi_type_is_64bit(stype)) {
1450 LLVMValueRef temp_ptr2, res2;
1451
1452 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1453 res2 = LLVMBuildLoad(builder, temp_ptr2, "");
1454 res = emit_fetch_64bit(bld_base, stype, res, res2);
1455 }
1456 }
1457
1458 if (stype == TGSI_TYPE_SIGNED ||
1459 stype == TGSI_TYPE_UNSIGNED ||
1460 stype == TGSI_TYPE_DOUBLE ||
1461 stype == TGSI_TYPE_SIGNED64 ||
1462 stype == TGSI_TYPE_UNSIGNED64) {
1463 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1464 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1465 }
1466
1467 return res;
1468 }
1469
1470 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1471 emit_fetch_system_value(
1472 struct lp_build_tgsi_context * bld_base,
1473 const struct tgsi_full_src_register * reg,
1474 enum tgsi_opcode_type stype,
1475 unsigned swizzle_in)
1476 {
1477 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1478 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1479 const struct tgsi_shader_info *info = bld->bld_base.info;
1480 LLVMBuilderRef builder = gallivm->builder;
1481 LLVMValueRef res;
1482 enum tgsi_opcode_type atype; // Actual type of the value
1483 unsigned swizzle = swizzle_in & 0xffff;
1484
1485 assert(!reg->Register.Indirect);
1486
1487 switch (info->system_value_semantic_name[reg->Register.Index]) {
1488 case TGSI_SEMANTIC_INSTANCEID:
1489 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1490 atype = TGSI_TYPE_UNSIGNED;
1491 break;
1492
1493 case TGSI_SEMANTIC_VERTEXID:
1494 res = bld->system_values.vertex_id;
1495 atype = TGSI_TYPE_UNSIGNED;
1496 break;
1497
1498 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1499 res = bld->system_values.vertex_id_nobase;
1500 atype = TGSI_TYPE_UNSIGNED;
1501 break;
1502
1503 case TGSI_SEMANTIC_BASEVERTEX:
1504 res = bld->system_values.basevertex;
1505 atype = TGSI_TYPE_UNSIGNED;
1506 break;
1507
1508 case TGSI_SEMANTIC_BASEINSTANCE:
1509 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1510 atype = TGSI_TYPE_UNSIGNED;
1511 break;
1512
1513 case TGSI_SEMANTIC_PRIMID:
1514 res = bld->system_values.prim_id;
1515 atype = TGSI_TYPE_UNSIGNED;
1516 break;
1517
1518 case TGSI_SEMANTIC_INVOCATIONID:
1519 if (info->processor == PIPE_SHADER_TESS_CTRL)
1520 res = bld->system_values.invocation_id;
1521 else
1522 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1523 atype = TGSI_TYPE_UNSIGNED;
1524 break;
1525
1526 case TGSI_SEMANTIC_HELPER_INVOCATION:
1527 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1528 atype = TGSI_TYPE_UNSIGNED;
1529 break;
1530
1531 case TGSI_SEMANTIC_THREAD_ID:
1532 res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
1533 atype = TGSI_TYPE_UNSIGNED;
1534 break;
1535
1536 case TGSI_SEMANTIC_BLOCK_ID:
1537 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
1538 atype = TGSI_TYPE_UNSIGNED;
1539 break;
1540
1541 case TGSI_SEMANTIC_GRID_SIZE:
1542 res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
1543 atype = TGSI_TYPE_UNSIGNED;
1544 break;
1545
1546 case TGSI_SEMANTIC_TESSCOORD:
1547 {
1548 LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1549 LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1550 res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
1551 }
1552 atype = TGSI_TYPE_FLOAT;
1553 break;
1554
1555 case TGSI_SEMANTIC_FACE:
1556 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1557 atype = TGSI_TYPE_UNSIGNED;
1558 break;
1559
1560 case TGSI_SEMANTIC_DRAWID:
1561 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1562 atype = TGSI_TYPE_UNSIGNED;
1563 break;
1564
1565 case TGSI_SEMANTIC_SAMPLEID:
1566 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1567 atype = TGSI_TYPE_UNSIGNED;
1568 break;
1569
1570 case TGSI_SEMANTIC_TESSOUTER:
1571 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1572 bld->system_values.tess_outer,
1573 lp_build_const_int32(gallivm, swizzle_in));
1574 atype = TGSI_TYPE_FLOAT;
1575 break;
1576
1577 case TGSI_SEMANTIC_TESSINNER:
1578 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1579 bld->system_values.tess_inner,
1580 lp_build_const_int32(gallivm, swizzle_in));
1581 atype = TGSI_TYPE_FLOAT;
1582 break;
1583
1584 case TGSI_SEMANTIC_VERTICESIN:
1585 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1586 atype = TGSI_TYPE_UNSIGNED;
1587 break;
1588
1589 default:
1590 assert(!"unexpected semantic in emit_fetch_system_value");
1591 res = bld_base->base.zero;
1592 atype = TGSI_TYPE_FLOAT;
1593 break;
1594 }
1595
1596 if (atype != stype) {
1597 if (stype == TGSI_TYPE_FLOAT) {
1598 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1599 } else if (stype == TGSI_TYPE_UNSIGNED) {
1600 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1601 } else if (stype == TGSI_TYPE_SIGNED) {
1602 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1603 }
1604 }
1605
1606 return res;
1607 }
1608
1609 /**
1610 * Register fetch with derivatives.
1611 */
1612 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1613 emit_fetch_deriv(
1614 struct lp_build_tgsi_soa_context *bld,
1615 LLVMValueRef src,
1616 LLVMValueRef *res,
1617 LLVMValueRef *ddx,
1618 LLVMValueRef *ddy)
1619 {
1620 if (res)
1621 *res = src;
1622
1623 /* TODO: use interpolation coeffs for inputs */
1624
1625 if (ddx)
1626 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1627
1628 if (ddy)
1629 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1630 }
1631
1632 /**
1633 * store an array of vec-length 64-bit into two arrays of vec_length floats
1634 * i.e.
1635 * value is d0, d1, d2, d3 etc.
1636 * each 64-bit has high and low pieces x, y
1637 * so gets stored into the separate channels as:
1638 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1639 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1640 */
1641 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1642 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1643 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1644 LLVMValueRef value)
1645 {
1646 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1647 struct gallivm_state *gallivm = bld_base->base.gallivm;
1648 LLVMBuilderRef builder = gallivm->builder;
1649 struct lp_build_context *float_bld = &bld_base->base;
1650 unsigned i;
1651 LLVMValueRef temp, temp2;
1652 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1653 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1654
1655 for (i = 0; i < bld_base->base.type.length; i++) {
1656 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1657 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1658 }
1659
1660 temp = LLVMBuildShuffleVector(builder, value,
1661 LLVMGetUndef(LLVMTypeOf(value)),
1662 LLVMConstVector(shuffles,
1663 bld_base->base.type.length),
1664 "");
1665 temp2 = LLVMBuildShuffleVector(builder, value,
1666 LLVMGetUndef(LLVMTypeOf(value)),
1667 LLVMConstVector(shuffles2,
1668 bld_base->base.type.length),
1669 "");
1670
1671 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1672 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1673 }
1674
1675 static void
emit_store_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1676 emit_store_output(struct lp_build_tgsi_context *bld_base,
1677 enum tgsi_opcode_type dtype,
1678 const struct tgsi_full_dst_register *reg,
1679 unsigned index,
1680 unsigned chan_index,
1681 LLVMValueRef indirect_index,
1682 LLVMValueRef value)
1683 {
1684 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1685 struct gallivm_state *gallivm = bld_base->base.gallivm;
1686 LLVMBuilderRef builder = gallivm->builder;
1687 struct lp_build_context *float_bld = &bld_base->base;
1688
1689 /* Outputs are always stored as floats */
1690 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1691
1692 if (reg->Register.Indirect) {
1693 LLVMValueRef index_vec; /* indexes into the output registers */
1694 LLVMValueRef outputs_array;
1695 LLVMTypeRef fptr_type;
1696
1697 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1698 indirect_index,
1699 chan_index,
1700 TRUE);
1701
1702 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1703 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1704
1705 /* Scatter store values into output registers */
1706 emit_mask_scatter(bld, outputs_array, index_vec, value,
1707 &bld->exec_mask);
1708 }
1709 else {
1710 assert(LLVMTypeOf(value) == float_bld->vec_type);
1711 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1712 chan_index);
1713
1714 if (tgsi_type_is_64bit(dtype)) {
1715 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1716 chan_index + 1);
1717 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1718 value);
1719 } else
1720 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1721 }
1722 }
1723
1724 static void
emit_store_tcs_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1725 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1726 enum tgsi_opcode_type dtype,
1727 const struct tgsi_full_dst_register *reg,
1728 unsigned index,
1729 unsigned chan_index,
1730 LLVMValueRef indirect_index,
1731 LLVMValueRef value)
1732 {
1733 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1734 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1735 const struct tgsi_shader_info *info = bld->bld_base.info;
1736 LLVMValueRef attrib_index = NULL;
1737 LLVMValueRef vertex_index = NULL;
1738 LLVMValueRef channel_index = NULL;
1739
1740 if (reg->Register.Indirect) {
1741 /*
1742 * XXX: this is possibly not quite the right value, since file_max may be
1743 * larger than the max attrib index, due to it being the max of declared
1744 * inputs AND the max vertices per prim (which is 6 for tri adj).
1745 * It should however be safe to use (since we always allocate
1746 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1747 */
1748 int index_limit = info->file_max[reg->Register.File];
1749 attrib_index = get_indirect_index(bld,
1750 reg->Register.File,
1751 reg->Register.Index,
1752 ®->Indirect,
1753 index_limit);
1754 } else {
1755 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1756 }
1757
1758 if (reg->Dimension.Indirect) {
1759 vertex_index = get_indirect_index(bld,
1760 reg->Register.File,
1761 reg->Dimension.Index,
1762 ®->DimIndirect,
1763 PIPE_MAX_SHADER_OUTPUTS);
1764 } else {
1765 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1766 }
1767
1768 channel_index = lp_build_const_int32(gallivm, chan_index);
1769
1770 assert(bld->tcs_iface->emit_store_output);
1771 bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1772 bld_base->info->output_semantic_name[reg->Register.Index],
1773 reg->Dimension.Indirect,
1774 vertex_index,
1775 reg->Register.Indirect,
1776 attrib_index,
1777 false,
1778 channel_index,
1779 value,
1780 mask_vec(bld_base));
1781 }
1782
1783 static void
emit_store_temp(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1784 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1785 enum tgsi_opcode_type dtype,
1786 const struct tgsi_full_dst_register *reg,
1787 unsigned index,
1788 unsigned chan_index,
1789 LLVMValueRef indirect_index,
1790 LLVMValueRef value)
1791 {
1792 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1793 struct gallivm_state *gallivm = bld_base->base.gallivm;
1794 LLVMBuilderRef builder = gallivm->builder;
1795 struct lp_build_context *float_bld = &bld_base->base;
1796
1797 /* Temporaries are always stored as floats */
1798 if (!tgsi_type_is_64bit(dtype))
1799 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1800 else
1801 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1802
1803 if (reg->Register.Indirect) {
1804 LLVMValueRef index_vec; /* indexes into the temp registers */
1805 LLVMValueRef temps_array;
1806 LLVMTypeRef fptr_type;
1807
1808 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1809 indirect_index,
1810 chan_index,
1811 TRUE);
1812
1813 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1814 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1815
1816 /* Scatter store values into temp registers */
1817 emit_mask_scatter(bld, temps_array, index_vec, value,
1818 &bld->exec_mask);
1819 }
1820 else {
1821 LLVMValueRef temp_ptr;
1822 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1823
1824 if (tgsi_type_is_64bit(dtype)) {
1825 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1826 reg->Register.Index,
1827 chan_index + 1);
1828 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1829 value);
1830 }
1831 else
1832 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1833 }
1834 }
1835
1836 static void
emit_store_address(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1837 emit_store_address(struct lp_build_tgsi_context *bld_base,
1838 enum tgsi_opcode_type dtype,
1839 const struct tgsi_full_dst_register *reg,
1840 unsigned index,
1841 unsigned chan_index,
1842 LLVMValueRef indirect_index,
1843 LLVMValueRef value)
1844 {
1845 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1846 struct gallivm_state *gallivm = bld_base->base.gallivm;
1847 LLVMBuilderRef builder = gallivm->builder;
1848 struct lp_build_context *int_bld = &bld_base->int_bld;
1849
1850 assert(dtype == TGSI_TYPE_SIGNED);
1851 assert(LLVMTypeOf(value) == int_bld->vec_type);
1852 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1853 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1854 bld->addr[reg->Register.Index][chan_index]);
1855 }
1856
1857 /**
1858 * Register store.
1859 */
1860 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1861 emit_store_chan(
1862 struct lp_build_tgsi_context *bld_base,
1863 const struct tgsi_full_instruction *inst,
1864 unsigned index,
1865 unsigned chan_index,
1866 LLVMValueRef value)
1867 {
1868 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1869 struct gallivm_state *gallivm = bld_base->base.gallivm;
1870 LLVMBuilderRef builder = gallivm->builder;
1871 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1872 struct lp_build_context *float_bld = &bld_base->base;
1873 LLVMValueRef indirect_index = NULL;
1874 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1875
1876 /*
1877 * Apply saturation.
1878 *
1879 * It is always assumed to be float.
1880 */
1881 if (inst->Instruction.Saturate) {
1882 assert(dtype == TGSI_TYPE_FLOAT ||
1883 dtype == TGSI_TYPE_UNTYPED);
1884 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1885 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1886 }
1887
1888 if (reg->Register.Indirect) {
1889 /*
1890 * Currently the mesa/st doesn't generate indirect stores
1891 * to 64-bit values, it normally uses MOV to do indirect stores.
1892 */
1893 assert(!tgsi_type_is_64bit(dtype));
1894 indirect_index = get_indirect_index(bld,
1895 reg->Register.File,
1896 reg->Register.Index,
1897 ®->Indirect,
1898 bld->bld_base.info->file_max[reg->Register.File]);
1899 } else {
1900 assert(reg->Register.Index <=
1901 bld_base->info->file_max[reg->Register.File]);
1902 }
1903
1904 if (DEBUG_EXECUTION) {
1905 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1906 }
1907
1908 assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1909 bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1910 dtype,
1911 reg,
1912 index,
1913 chan_index,
1914 indirect_index,
1915 value);
1916
1917 (void)dtype;
1918 }
1919
1920 /*
1921 * Called at the beginning of the translation of each TGSI instruction, to
1922 * emit some debug code.
1923 */
1924 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1925 emit_debug(
1926 struct lp_build_tgsi_context * bld_base,
1927 const struct tgsi_full_instruction * inst,
1928 const struct tgsi_opcode_info * info)
1929
1930 {
1931 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1932
1933 if (DEBUG_EXECUTION) {
1934 /*
1935 * Dump the TGSI instruction.
1936 */
1937
1938 struct gallivm_state *gallivm = bld_base->base.gallivm;
1939 char buf[512];
1940 buf[0] = '$';
1941 buf[1] = ' ';
1942 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1943 lp_build_printf(gallivm, buf);
1944
1945 /* Dump the execution mask.
1946 */
1947 if (bld->exec_mask.has_mask) {
1948 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1949 }
1950 }
1951 }
1952
1953 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1954 emit_store(
1955 struct lp_build_tgsi_context * bld_base,
1956 const struct tgsi_full_instruction * inst,
1957 const struct tgsi_opcode_info * info,
1958 unsigned index,
1959 LLVMValueRef dst[4])
1960
1961 {
1962 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1963
1964 unsigned writemask = inst->Dst[index].Register.WriteMask;
1965 while (writemask) {
1966 unsigned chan_index = u_bit_scan(&writemask);
1967 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1968 continue;
1969 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1970 }
1971 }
1972
1973 static unsigned
tgsi_to_pipe_tex_target(unsigned tgsi_target)1974 tgsi_to_pipe_tex_target(unsigned tgsi_target)
1975 {
1976 switch (tgsi_target) {
1977 case TGSI_TEXTURE_BUFFER:
1978 return PIPE_BUFFER;
1979 case TGSI_TEXTURE_1D:
1980 case TGSI_TEXTURE_SHADOW1D:
1981 return PIPE_TEXTURE_1D;
1982 case TGSI_TEXTURE_2D:
1983 case TGSI_TEXTURE_SHADOW2D:
1984 case TGSI_TEXTURE_2D_MSAA:
1985 return PIPE_TEXTURE_2D;
1986 case TGSI_TEXTURE_3D:
1987 return PIPE_TEXTURE_3D;
1988 case TGSI_TEXTURE_CUBE:
1989 case TGSI_TEXTURE_SHADOWCUBE:
1990 return PIPE_TEXTURE_CUBE;
1991 case TGSI_TEXTURE_RECT:
1992 case TGSI_TEXTURE_SHADOWRECT:
1993 return PIPE_TEXTURE_RECT;
1994 case TGSI_TEXTURE_1D_ARRAY:
1995 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1996 return PIPE_TEXTURE_1D_ARRAY;
1997 case TGSI_TEXTURE_2D_ARRAY:
1998 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1999 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2000 return PIPE_TEXTURE_2D_ARRAY;
2001 case TGSI_TEXTURE_CUBE_ARRAY:
2002 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2003 return PIPE_TEXTURE_CUBE_ARRAY;
2004 default:
2005 assert(0);
2006 return PIPE_BUFFER;
2007 }
2008 }
2009
2010
2011 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2012 lp_build_lod_property(
2013 struct lp_build_tgsi_context *bld_base,
2014 const struct tgsi_full_instruction *inst,
2015 unsigned src_op)
2016 {
2017 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2018 enum lp_sampler_lod_property lod_property;
2019
2020 /*
2021 * Not much we can do here. We could try catching inputs declared
2022 * with constant interpolation but not sure it's worth it - since for
2023 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2024 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2025 * like the constant/immediate recognition below.
2026 * What seems to be of more value would be to recognize temps holding
2027 * broadcasted scalars but no way we can do it.
2028 * Tried asking llvm but without any success (using LLVMIsConstant
2029 * even though this isn't exactly what we'd need), even as simple as
2030 * IMM[0] UINT32 (0,-1,0,0)
2031 * MOV TEMP[0] IMM[0].yyyy
2032 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2033 * doesn't work.
2034 * This means there's ZERO chance this will ever catch a scalar lod
2035 * with traditional tex opcodes as well as texel fetches, since the lod
2036 * comes from the same reg as coords (except some test shaders using
2037 * constant coords maybe).
2038 * There's at least hope for sample opcodes as well as size queries.
2039 */
2040 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2041 reg->Register.File == TGSI_FILE_CONSTANT ||
2042 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2043 lod_property = LP_SAMPLER_LOD_SCALAR;
2044 }
2045 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2046 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2047 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2048 }
2049 else {
2050 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2051 }
2052 }
2053 else {
2054 /* never use scalar (per-quad) lod the results are just too wrong. */
2055 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2056 }
2057 return lod_property;
2058 }
2059
2060
2061 /**
2062 * High-level instruction translators.
2063 */
2064
2065 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2066 emit_tex( struct lp_build_tgsi_soa_context *bld,
2067 const struct tgsi_full_instruction *inst,
2068 enum lp_build_tex_modifier modifier,
2069 LLVMValueRef *texel,
2070 unsigned sampler_reg,
2071 enum lp_sampler_op_type sampler_op)
2072 {
2073 unsigned unit = inst->Src[sampler_reg].Register.Index;
2074 LLVMValueRef oow = NULL;
2075 LLVMValueRef lod = NULL;
2076 LLVMValueRef coords[5];
2077 LLVMValueRef offsets[3] = { NULL };
2078 struct lp_derivatives derivs;
2079 struct lp_sampler_params params;
2080 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2081 unsigned num_derivs, num_offsets, i;
2082 unsigned shadow_coord = 0;
2083 unsigned layer_coord = 0;
2084 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2085
2086 memset(¶ms, 0, sizeof(params));
2087
2088 if (!bld->sampler) {
2089 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2090 for (i = 0; i < 4; i++) {
2091 texel[i] = bld->bld_base.base.undef;
2092 }
2093 return;
2094 }
2095
2096 switch (inst->Texture.Texture) {
2097 case TGSI_TEXTURE_1D_ARRAY:
2098 layer_coord = 1;
2099 FALLTHROUGH;
2100 case TGSI_TEXTURE_1D:
2101 num_offsets = 1;
2102 num_derivs = 1;
2103 break;
2104 case TGSI_TEXTURE_2D_ARRAY:
2105 layer_coord = 2;
2106 FALLTHROUGH;
2107 case TGSI_TEXTURE_2D:
2108 case TGSI_TEXTURE_RECT:
2109 num_offsets = 2;
2110 num_derivs = 2;
2111 break;
2112 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2113 layer_coord = 1;
2114 FALLTHROUGH;
2115 case TGSI_TEXTURE_SHADOW1D:
2116 shadow_coord = 2;
2117 num_offsets = 1;
2118 num_derivs = 1;
2119 break;
2120 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2121 layer_coord = 2;
2122 shadow_coord = 3;
2123 num_offsets = 2;
2124 num_derivs = 2;
2125 break;
2126 case TGSI_TEXTURE_SHADOW2D:
2127 case TGSI_TEXTURE_SHADOWRECT:
2128 shadow_coord = 2;
2129 num_offsets = 2;
2130 num_derivs = 2;
2131 break;
2132 case TGSI_TEXTURE_CUBE:
2133 num_offsets = 2;
2134 num_derivs = 3;
2135 break;
2136 case TGSI_TEXTURE_3D:
2137 num_offsets = 3;
2138 num_derivs = 3;
2139 break;
2140 case TGSI_TEXTURE_SHADOWCUBE:
2141 shadow_coord = 3;
2142 num_offsets = 2;
2143 num_derivs = 3;
2144 break;
2145 case TGSI_TEXTURE_CUBE_ARRAY:
2146 num_offsets = 2;
2147 num_derivs = 3;
2148 layer_coord = 3;
2149 break;
2150 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2151 num_offsets = 2;
2152 num_derivs = 3;
2153 layer_coord = 3;
2154 shadow_coord = 4; /* shadow coord special different reg */
2155 break;
2156 case TGSI_TEXTURE_2D_MSAA:
2157 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2158 default:
2159 assert(0);
2160 return;
2161 }
2162
2163 /* Note lod and especially projected are illegal in a LOT of cases */
2164 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2165 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2166 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2167 lod = bld->bld_base.base.zero;
2168 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2169 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2170 /* note that shadow cube array with bias/explicit lod does not exist */
2171 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2172 }
2173 else {
2174 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2175 }
2176 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2177 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2178 }
2179 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2180 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2181 }
2182 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2183 }
2184
2185 if (sampler_op == LP_SAMPLER_OP_GATHER) {
2186 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2187 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2188 }
2189 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2190 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2191 oow = lp_build_rcp(&bld->bld_base.base, oow);
2192 }
2193
2194 for (i = 0; i < num_derivs; i++) {
2195 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2196 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2197 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2198 }
2199 for (i = num_derivs; i < 5; i++) {
2200 coords[i] = bld->bld_base.base.undef;
2201 }
2202
2203 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2204 if (layer_coord) {
2205 if (layer_coord == 3) {
2206 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2207 }
2208 else {
2209 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2210 }
2211 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2212 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2213 }
2214 /* Shadow coord occupies always 5th slot. */
2215 if (shadow_coord) {
2216 sample_key |= LP_SAMPLER_SHADOW;
2217 if (shadow_coord == 4) {
2218 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2219 }
2220 else {
2221 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2222 }
2223 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2224 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2225 }
2226
2227 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2228 unsigned dim;
2229 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2230 for (dim = 0; dim < num_derivs; ++dim) {
2231 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2232 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2233 }
2234 params.derivs = &derivs;
2235 /*
2236 * could also check all src regs if constant but I doubt such
2237 * cases exist in practice.
2238 */
2239 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2240 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2241 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2242 }
2243 else {
2244 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2245 }
2246 }
2247 else {
2248 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2249 }
2250 }
2251 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2252
2253 /* we don't handle the 4 offset version of tg4 */
2254 if (inst->Texture.NumOffsets == 1) {
2255 unsigned dim;
2256 sample_key |= LP_SAMPLER_OFFSETS;
2257 for (dim = 0; dim < num_offsets; dim++) {
2258 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2259 }
2260 }
2261
2262 params.type = bld->bld_base.base.type;
2263 params.sample_key = sample_key;
2264 params.texture_index = unit;
2265 params.sampler_index = unit;
2266 params.context_ptr = bld->context_ptr;
2267 params.thread_data_ptr = bld->thread_data_ptr;
2268 params.coords = coords;
2269 params.offsets = offsets;
2270 params.lod = lod;
2271 params.texel = texel;
2272
2273 bld->sampler->emit_tex_sample(bld->sampler,
2274 bld->bld_base.base.gallivm,
2275 ¶ms);
2276 }
2277
2278 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,boolean compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2279 emit_sample(struct lp_build_tgsi_soa_context *bld,
2280 const struct tgsi_full_instruction *inst,
2281 enum lp_build_tex_modifier modifier,
2282 boolean compare,
2283 enum lp_sampler_op_type sample_type,
2284 LLVMValueRef *texel)
2285 {
2286 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2287 unsigned texture_unit, sampler_unit;
2288 LLVMValueRef lod = NULL;
2289 LLVMValueRef coords[5];
2290 LLVMValueRef offsets[3] = { NULL };
2291 struct lp_derivatives derivs;
2292 struct lp_sampler_params params;
2293 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2294
2295 unsigned num_offsets, num_derivs, i;
2296 unsigned layer_coord = 0;
2297 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2298
2299 memset(¶ms, 0, sizeof(params));
2300
2301 if (!bld->sampler) {
2302 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2303 for (i = 0; i < 4; i++) {
2304 texel[i] = bld->bld_base.base.undef;
2305 }
2306 return;
2307 }
2308
2309 /*
2310 * unlike old-style tex opcodes the texture/sampler indices
2311 * always come from src1 and src2 respectively.
2312 */
2313 texture_unit = inst->Src[1].Register.Index;
2314 sampler_unit = inst->Src[2].Register.Index;
2315
2316 /*
2317 * Note inst->Texture.Texture will contain the number of offsets,
2318 * however the target information is NOT there and comes from the
2319 * declared sampler views instead.
2320 */
2321 switch (bld->sv[texture_unit].Resource) {
2322 case TGSI_TEXTURE_1D:
2323 num_offsets = 1;
2324 num_derivs = 1;
2325 break;
2326 case TGSI_TEXTURE_1D_ARRAY:
2327 layer_coord = 1;
2328 num_offsets = 1;
2329 num_derivs = 1;
2330 break;
2331 case TGSI_TEXTURE_2D:
2332 case TGSI_TEXTURE_RECT:
2333 num_offsets = 2;
2334 num_derivs = 2;
2335 break;
2336 case TGSI_TEXTURE_2D_ARRAY:
2337 layer_coord = 2;
2338 num_offsets = 2;
2339 num_derivs = 2;
2340 break;
2341 case TGSI_TEXTURE_CUBE:
2342 num_offsets = 2;
2343 num_derivs = 3;
2344 break;
2345 case TGSI_TEXTURE_3D:
2346 num_offsets = 3;
2347 num_derivs = 3;
2348 break;
2349 case TGSI_TEXTURE_CUBE_ARRAY:
2350 layer_coord = 3;
2351 num_offsets = 2;
2352 num_derivs = 3;
2353 break;
2354 default:
2355 assert(0);
2356 return;
2357 }
2358
2359 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2360 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2361 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2362 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2363 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2364 }
2365 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2366 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2367 }
2368 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2369 }
2370 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2371 /* XXX might be better to explicitly pass the level zero information */
2372 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2373 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2374 }
2375
2376 for (i = 0; i < num_derivs; i++) {
2377 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2378 }
2379 for (i = num_derivs; i < 5; i++) {
2380 coords[i] = bld->bld_base.base.undef;
2381 }
2382
2383 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2384 if (layer_coord) {
2385 if (layer_coord == 3)
2386 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2387 else
2388 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2389 }
2390 /* Shadow coord occupies always 5th slot. */
2391 if (compare) {
2392 sample_key |= LP_SAMPLER_SHADOW;
2393 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2394 }
2395
2396 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2397 unsigned dim;
2398 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2399 for (dim = 0; dim < num_derivs; ++dim) {
2400 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2401 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2402 }
2403 params.derivs = &derivs;
2404 /*
2405 * could also check all src regs if constant but I doubt such
2406 * cases exist in practice.
2407 */
2408 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2409 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2410 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2411 }
2412 else {
2413 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2414 }
2415 }
2416 else {
2417 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2418 }
2419 }
2420
2421 /* some advanced gather instructions (txgo) would require 4 offsets */
2422 if (inst->Texture.NumOffsets == 1) {
2423 unsigned dim;
2424 sample_key |= LP_SAMPLER_OFFSETS;
2425 for (dim = 0; dim < num_offsets; dim++) {
2426 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2427 }
2428 }
2429 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2430
2431 params.type = bld->bld_base.base.type;
2432 params.sample_key = sample_key;
2433 params.texture_index = texture_unit;
2434 params.sampler_index = sampler_unit;
2435 params.context_ptr = bld->context_ptr;
2436 params.thread_data_ptr = bld->thread_data_ptr;
2437 params.coords = coords;
2438 params.offsets = offsets;
2439 params.lod = lod;
2440 params.texel = texel;
2441
2442 bld->sampler->emit_tex_sample(bld->sampler,
2443 bld->bld_base.base.gallivm,
2444 ¶ms);
2445
2446 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2447 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2448 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2449 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2450 unsigned char swizzles[4];
2451 swizzles[0] = inst->Src[1].Register.SwizzleX;
2452 swizzles[1] = inst->Src[1].Register.SwizzleY;
2453 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2454 swizzles[3] = inst->Src[1].Register.SwizzleW;
2455
2456 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2457 }
2458 }
2459
2460 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,boolean is_samplei)2461 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2462 const struct tgsi_full_instruction *inst,
2463 LLVMValueRef *texel,
2464 boolean is_samplei)
2465 {
2466 unsigned unit, target;
2467 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2468 LLVMValueRef explicit_lod = NULL;
2469 LLVMValueRef coords[5];
2470 LLVMValueRef offsets[3] = { NULL };
2471 LLVMValueRef ms_index = NULL;
2472 struct lp_sampler_params params;
2473 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2474 unsigned dims, i;
2475 unsigned layer_coord = 0;
2476 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2477
2478 memset(¶ms, 0, sizeof(params));
2479
2480 if (!bld->sampler) {
2481 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2482 for (i = 0; i < 4; i++) {
2483 texel[i] = coord_undef;
2484 }
2485 return;
2486 }
2487
2488 unit = inst->Src[1].Register.Index;
2489
2490 if (is_samplei) {
2491 target = bld->sv[unit].Resource;
2492 }
2493 else {
2494 target = inst->Texture.Texture;
2495 }
2496
2497 switch (target) {
2498 case TGSI_TEXTURE_1D:
2499 case TGSI_TEXTURE_BUFFER:
2500 dims = 1;
2501 break;
2502 case TGSI_TEXTURE_1D_ARRAY:
2503 layer_coord = 1;
2504 dims = 1;
2505 break;
2506 case TGSI_TEXTURE_2D:
2507 case TGSI_TEXTURE_RECT:
2508 case TGSI_TEXTURE_2D_MSAA:
2509 dims = 2;
2510 break;
2511 case TGSI_TEXTURE_2D_ARRAY:
2512 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2513 layer_coord = 2;
2514 dims = 2;
2515 break;
2516 case TGSI_TEXTURE_3D:
2517 dims = 3;
2518 break;
2519 default:
2520 assert(0);
2521 return;
2522 }
2523
2524 /* always have lod except for buffers and msaa targets ? */
2525 if (target != TGSI_TEXTURE_BUFFER &&
2526 target != TGSI_TEXTURE_2D_MSAA &&
2527 target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2528 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2529 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2530 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2531 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2532 }
2533
2534 if (target == TGSI_TEXTURE_2D_MSAA ||
2535 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2536 sample_key |= LP_SAMPLER_FETCH_MS;
2537 ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2538 }
2539
2540 /*
2541 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2542 * would be the sample index.
2543 */
2544
2545 for (i = 0; i < dims; i++) {
2546 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2547 }
2548 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2549 for (i = dims; i < 5; i++) {
2550 coords[i] = coord_undef;
2551 }
2552 if (layer_coord)
2553 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2554
2555 if (inst->Texture.NumOffsets == 1) {
2556 unsigned dim;
2557 sample_key |= LP_SAMPLER_OFFSETS;
2558 for (dim = 0; dim < dims; dim++) {
2559 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2560 }
2561 }
2562 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2563
2564 params.type = bld->bld_base.base.type;
2565 params.sample_key = sample_key;
2566 params.texture_index = unit;
2567 /*
2568 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2569 * and trigger some assertions with d3d10 where the sampler view number
2570 * can exceed this.
2571 */
2572 params.sampler_index = 0;
2573 params.context_ptr = bld->context_ptr;
2574 params.thread_data_ptr = bld->thread_data_ptr;
2575 params.coords = coords;
2576 params.offsets = offsets;
2577 params.derivs = NULL;
2578 params.lod = explicit_lod;
2579 params.texel = texel;
2580 params.ms_index = ms_index;
2581
2582 bld->sampler->emit_tex_sample(bld->sampler,
2583 bld->bld_base.base.gallivm,
2584 ¶ms);
2585
2586 if (is_samplei &&
2587 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2588 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2589 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2590 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2591 unsigned char swizzles[4];
2592 swizzles[0] = inst->Src[1].Register.SwizzleX;
2593 swizzles[1] = inst->Src[1].Register.SwizzleY;
2594 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2595 swizzles[3] = inst->Src[1].Register.SwizzleW;
2596
2597 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2598 }
2599 }
2600
2601 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,boolean is_sviewinfo)2602 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2603 const struct tgsi_full_instruction *inst,
2604 LLVMValueRef *sizes_out,
2605 boolean is_sviewinfo)
2606 {
2607 LLVMValueRef explicit_lod;
2608 enum lp_sampler_lod_property lod_property;
2609 unsigned has_lod;
2610 unsigned i;
2611 unsigned unit = inst->Src[1].Register.Index;
2612 unsigned target, pipe_target;
2613 struct lp_sampler_size_query_params params;
2614
2615 if (is_sviewinfo) {
2616 target = bld->sv[unit].Resource;
2617 }
2618 else {
2619 target = inst->Texture.Texture;
2620 }
2621 switch (target) {
2622 case TGSI_TEXTURE_BUFFER:
2623 case TGSI_TEXTURE_RECT:
2624 case TGSI_TEXTURE_SHADOWRECT:
2625 has_lod = 0;
2626 break;
2627 default:
2628 has_lod = 1;
2629 break;
2630 }
2631
2632 if (!bld->sampler) {
2633 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2634 for (i = 0; i < 4; i++)
2635 sizes_out[i] = bld->bld_base.int_bld.undef;
2636 return;
2637 }
2638
2639 if (has_lod) {
2640 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2641 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2642 }
2643 else {
2644 explicit_lod = NULL;
2645 lod_property = LP_SAMPLER_LOD_SCALAR;
2646 }
2647
2648
2649 pipe_target = tgsi_to_pipe_tex_target(target);
2650
2651 params.int_type = bld->bld_base.int_bld.type;
2652 params.texture_unit = unit;
2653 params.target = pipe_target;
2654 params.context_ptr = bld->context_ptr;
2655 params.is_sviewinfo = TRUE;
2656 params.lod_property = lod_property;
2657 params.explicit_lod = explicit_lod;
2658 params.sizes_out = sizes_out;
2659 params.samples_only = false;
2660
2661 bld->sampler->emit_size_query(bld->sampler,
2662 bld->bld_base.base.gallivm,
2663 ¶ms);
2664 }
2665
2666 static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2667 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2668 int pc)
2669 {
2670 unsigned i;
2671
2672 for (i = 0; i < 5; i++) {
2673 enum tgsi_opcode opcode;
2674
2675 if (pc + i >= bld->bld_base.info->num_instructions)
2676 return TRUE;
2677
2678 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2679
2680 if (opcode == TGSI_OPCODE_END)
2681 return TRUE;
2682
2683 if (opcode == TGSI_OPCODE_TEX ||
2684 opcode == TGSI_OPCODE_TXP ||
2685 opcode == TGSI_OPCODE_TXD ||
2686 opcode == TGSI_OPCODE_TXB ||
2687 opcode == TGSI_OPCODE_TXL ||
2688 opcode == TGSI_OPCODE_TXF ||
2689 opcode == TGSI_OPCODE_TXQ ||
2690 opcode == TGSI_OPCODE_TEX2 ||
2691 opcode == TGSI_OPCODE_TXB2 ||
2692 opcode == TGSI_OPCODE_TXL2 ||
2693 opcode == TGSI_OPCODE_SAMPLE ||
2694 opcode == TGSI_OPCODE_SAMPLE_B ||
2695 opcode == TGSI_OPCODE_SAMPLE_C ||
2696 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2697 opcode == TGSI_OPCODE_SAMPLE_D ||
2698 opcode == TGSI_OPCODE_SAMPLE_I ||
2699 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2700 opcode == TGSI_OPCODE_SAMPLE_L ||
2701 opcode == TGSI_OPCODE_SVIEWINFO ||
2702 opcode == TGSI_OPCODE_CAL ||
2703 opcode == TGSI_OPCODE_IF ||
2704 opcode == TGSI_OPCODE_UIF ||
2705 opcode == TGSI_OPCODE_BGNLOOP ||
2706 opcode == TGSI_OPCODE_SWITCH)
2707 return FALSE;
2708 }
2709
2710 return TRUE;
2711 }
2712
2713
2714
2715 /**
2716 * Kill fragment if any of the src register values are negative.
2717 */
2718 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2719 emit_kill_if(
2720 struct lp_build_tgsi_soa_context *bld,
2721 const struct tgsi_full_instruction *inst,
2722 int pc)
2723 {
2724 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2725 const struct tgsi_full_src_register *reg = &inst->Src[0];
2726 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2727 LLVMValueRef mask;
2728 unsigned chan_index;
2729
2730 memset(&terms, 0, sizeof terms);
2731
2732 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2733 unsigned swizzle;
2734
2735 /* Unswizzle channel */
2736 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2737
2738 /* Check if the component has not been already tested. */
2739 assert(swizzle < TGSI_NUM_CHANNELS);
2740 if( !terms[swizzle] )
2741 /* TODO: change the comparison operator instead of setting the sign */
2742 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2743 }
2744
2745 mask = NULL;
2746 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2747 if(terms[chan_index]) {
2748 LLVMValueRef chan_mask;
2749
2750 /*
2751 * If term < 0 then mask = 0 else mask = ~0.
2752 */
2753 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2754
2755 if(mask)
2756 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2757 else
2758 mask = chan_mask;
2759 }
2760 }
2761
2762 if (bld->exec_mask.has_mask) {
2763 LLVMValueRef invmask;
2764 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2765 mask = LLVMBuildOr(builder, mask, invmask, "");
2766 }
2767
2768 lp_build_mask_update(bld->mask, mask);
2769 if (!near_end_of_shader(bld, pc))
2770 lp_build_mask_check(bld->mask);
2771 }
2772
2773
2774 /**
2775 * Unconditional fragment kill.
2776 * The only predication is the execution mask which will apply if
2777 * we're inside a loop or conditional.
2778 */
2779 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2780 emit_kill(struct lp_build_tgsi_soa_context *bld,
2781 int pc)
2782 {
2783 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2784 LLVMValueRef mask;
2785
2786 /* For those channels which are "alive", disable fragment shader
2787 * execution.
2788 */
2789 if (bld->exec_mask.has_mask) {
2790 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2791 }
2792 else {
2793 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2794 mask = zero;
2795 }
2796
2797 lp_build_mask_update(bld->mask, mask);
2798
2799 if (!near_end_of_shader(bld, pc))
2800 lp_build_mask_check(bld->mask);
2801 }
2802
2803
2804 /**
2805 * Emit code which will dump the value of all the temporary registers
2806 * to stdout.
2807 */
2808 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2809 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2810 unsigned file)
2811 {
2812 const struct tgsi_shader_info *info = bld->bld_base.info;
2813 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2814 LLVMBuilderRef builder = gallivm->builder;
2815 LLVMValueRef reg_ptr;
2816 int index;
2817 int max_index = info->file_max[file];
2818
2819 /*
2820 * Some register files, particularly constants, can be very large,
2821 * and dumping everything could make this unusably slow.
2822 */
2823 max_index = MIN2(max_index, 32);
2824
2825 for (index = 0; index <= max_index; index++) {
2826 LLVMValueRef res;
2827 unsigned mask;
2828 int chan;
2829
2830 if (index < 8 * sizeof(unsigned) &&
2831 (info->file_mask[file] & (1u << index)) == 0) {
2832 /* This was not declared.*/
2833 continue;
2834 }
2835
2836 if (file == TGSI_FILE_INPUT) {
2837 mask = info->input_usage_mask[index];
2838 } else {
2839 mask = TGSI_WRITEMASK_XYZW;
2840 }
2841
2842 for (chan = 0; chan < 4; chan++) {
2843 if ((mask & (1 << chan)) == 0) {
2844 /* This channel is not used.*/
2845 continue;
2846 }
2847
2848 if (file == TGSI_FILE_CONSTANT) {
2849 struct tgsi_full_src_register reg;
2850 memset(®, 0, sizeof reg);
2851 reg.Register.File = file;
2852 reg.Register.Index = index;
2853 reg.Register.SwizzleX = 0;
2854 reg.Register.SwizzleY = 1;
2855 reg.Register.SwizzleZ = 2;
2856 reg.Register.SwizzleW = 3;
2857
2858 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
2859 if (!res) {
2860 continue;
2861 }
2862 } else if (file == TGSI_FILE_INPUT) {
2863 res = bld->inputs[index][chan];
2864 if (!res) {
2865 continue;
2866 }
2867 } else if (file == TGSI_FILE_TEMPORARY) {
2868 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2869 assert(reg_ptr);
2870 res = LLVMBuildLoad(builder, reg_ptr, "");
2871 } else if (file == TGSI_FILE_OUTPUT) {
2872 reg_ptr = lp_get_output_ptr(bld, index, chan);
2873 assert(reg_ptr);
2874 res = LLVMBuildLoad(builder, reg_ptr, "");
2875 } else {
2876 assert(0);
2877 continue;
2878 }
2879
2880 emit_dump_reg(gallivm, file, index, chan, res);
2881 }
2882 }
2883 }
2884
2885
2886
2887 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2888 lp_emit_declaration_soa(
2889 struct lp_build_tgsi_context *bld_base,
2890 const struct tgsi_full_declaration *decl)
2891 {
2892 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2893 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2894 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2895 const unsigned first = decl->Range.First;
2896 const unsigned last = decl->Range.Last;
2897 unsigned idx, i;
2898
2899 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2900
2901 switch (decl->Declaration.File) {
2902 case TGSI_FILE_TEMPORARY:
2903 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2904 assert(last < LP_MAX_INLINED_TEMPS);
2905 for (idx = first; idx <= last; ++idx) {
2906 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2907 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2908 }
2909 }
2910 break;
2911
2912 case TGSI_FILE_OUTPUT:
2913 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2914 for (idx = first; idx <= last; ++idx) {
2915 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2916 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2917 vec_type, "output");
2918 }
2919 }
2920 break;
2921
2922 case TGSI_FILE_ADDRESS:
2923 /* ADDR registers are only allocated with an integer LLVM IR type,
2924 * as they are guaranteed to always have integers.
2925 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2926 * an ADDR register for that matter).
2927 */
2928 assert(last < LP_MAX_TGSI_ADDRS);
2929 for (idx = first; idx <= last; ++idx) {
2930 assert(idx < LP_MAX_TGSI_ADDRS);
2931 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2932 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2933 }
2934 break;
2935
2936 case TGSI_FILE_SAMPLER_VIEW:
2937 /*
2938 * The target stored here MUST match whatever there actually
2939 * is in the set sampler views (what about return type?).
2940 */
2941 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2942 for (idx = first; idx <= last; ++idx) {
2943 bld->sv[idx] = decl->SamplerView;
2944 }
2945 break;
2946
2947 case TGSI_FILE_CONSTANT:
2948 {
2949 /*
2950 * We could trivially fetch the per-buffer pointer when fetching the
2951 * constant, relying on llvm to figure out it's always the same pointer
2952 * anyway. However, doing so results in a huge (more than factor of 10)
2953 * slowdown in llvm compilation times for some (but not all) shaders
2954 * (more specifically, the IR optimization spends way more time in
2955 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2956 */
2957 unsigned idx2D = decl->Dim.Index2D;
2958 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2959 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2960 bld->consts[idx2D] =
2961 lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2962 bld->consts_sizes[idx2D] =
2963 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2964 }
2965 break;
2966 case TGSI_FILE_BUFFER:
2967 {
2968 unsigned idx = decl->Range.First;
2969 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2970 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2971 bld->ssbos[idx] =
2972 lp_build_array_get(gallivm, bld->ssbo_ptr, index);
2973 bld->ssbo_sizes[idx] =
2974 lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
2975
2976 }
2977 break;
2978 case TGSI_FILE_MEMORY:
2979 break;
2980 default:
2981 /* don't need to declare other vars */
2982 break;
2983 }
2984 }
2985
2986
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)2987 void lp_emit_immediate_soa(
2988 struct lp_build_tgsi_context *bld_base,
2989 const struct tgsi_full_immediate *imm)
2990 {
2991 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2992 struct gallivm_state * gallivm = bld_base->base.gallivm;
2993 LLVMValueRef imms[4];
2994 unsigned i;
2995 const uint size = imm->Immediate.NrTokens - 1;
2996 assert(size <= 4);
2997 switch (imm->Immediate.DataType) {
2998 case TGSI_IMM_FLOAT32:
2999 for( i = 0; i < size; ++i )
3000 imms[i] =
3001 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3002
3003 break;
3004 case TGSI_IMM_FLOAT64:
3005 case TGSI_IMM_UINT64:
3006 case TGSI_IMM_INT64:
3007 case TGSI_IMM_UINT32:
3008 for( i = 0; i < size; ++i ) {
3009 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3010 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3011 }
3012
3013 break;
3014 case TGSI_IMM_INT32:
3015 for( i = 0; i < size; ++i ) {
3016 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3017 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3018 }
3019
3020 break;
3021 }
3022 for( i = size; i < 4; ++i )
3023 imms[i] = bld_base->base.undef;
3024
3025 if (bld->use_immediates_array) {
3026 unsigned index = bld->num_immediates;
3027 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3028 LLVMBuilderRef builder = gallivm->builder;
3029 LLVMValueRef gep[2];
3030 gep[0] = lp_build_const_int32(gallivm, 0);
3031
3032 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3033 for (i = 0; i < 4; ++i ) {
3034 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3035 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3036 bld->imms_array, gep, 2, "");
3037 LLVMBuildStore(builder, imms[i], imm_ptr);
3038 }
3039 } else {
3040 /* simply copy the immediate values into the next immediates[] slot */
3041 unsigned i;
3042 assert(imm->Immediate.NrTokens - 1 <= 4);
3043 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3044
3045 for(i = 0; i < 4; ++i )
3046 bld->immediates[bld->num_immediates][i] = imms[i];
3047
3048 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3049 unsigned index = bld->num_immediates;
3050 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3051 LLVMBuilderRef builder = gallivm->builder;
3052 LLVMValueRef gep[2];
3053 gep[0] = lp_build_const_int32(gallivm, 0);
3054 for (i = 0; i < 4; ++i ) {
3055 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3056 LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
3057 bld->imms_array, gep, 2, "");
3058 LLVMBuildStore(builder,
3059 bld->immediates[index][i],
3060 imm_ptr);
3061 }
3062 }
3063 }
3064
3065 bld->num_immediates++;
3066 }
3067
3068 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3069 ddx_emit(
3070 const struct lp_build_tgsi_action * action,
3071 struct lp_build_tgsi_context * bld_base,
3072 struct lp_build_emit_data * emit_data)
3073 {
3074 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3075
3076 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3077 &emit_data->output[emit_data->chan], NULL);
3078 }
3079
3080 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3081 ddy_emit(
3082 const struct lp_build_tgsi_action * action,
3083 struct lp_build_tgsi_context * bld_base,
3084 struct lp_build_emit_data * emit_data)
3085 {
3086 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3087
3088 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3089 &emit_data->output[emit_data->chan]);
3090 }
3091
3092 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3093 kill_emit(
3094 const struct lp_build_tgsi_action * action,
3095 struct lp_build_tgsi_context * bld_base,
3096 struct lp_build_emit_data * emit_data)
3097 {
3098 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3099
3100 emit_kill(bld, bld_base->pc - 1);
3101 }
3102
3103 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3104 kill_if_emit(
3105 const struct lp_build_tgsi_action * action,
3106 struct lp_build_tgsi_context * bld_base,
3107 struct lp_build_emit_data * emit_data)
3108 {
3109 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3110
3111 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3112 }
3113
3114 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3115 tex_emit(
3116 const struct lp_build_tgsi_action * action,
3117 struct lp_build_tgsi_context * bld_base,
3118 struct lp_build_emit_data * emit_data)
3119 {
3120 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3121
3122 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3123 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3124 }
3125
3126 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3127 tex2_emit(
3128 const struct lp_build_tgsi_action * action,
3129 struct lp_build_tgsi_context * bld_base,
3130 struct lp_build_emit_data * emit_data)
3131 {
3132 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3133
3134 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3135 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3136 }
3137
3138 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3139 txb_emit(
3140 const struct lp_build_tgsi_action * action,
3141 struct lp_build_tgsi_context * bld_base,
3142 struct lp_build_emit_data * emit_data)
3143 {
3144 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3145
3146 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3147 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3148 }
3149
3150 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3151 txb2_emit(
3152 const struct lp_build_tgsi_action * action,
3153 struct lp_build_tgsi_context * bld_base,
3154 struct lp_build_emit_data * emit_data)
3155 {
3156 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3157
3158 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3159 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3160 }
3161
3162 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3163 txd_emit(
3164 const struct lp_build_tgsi_action * action,
3165 struct lp_build_tgsi_context * bld_base,
3166 struct lp_build_emit_data * emit_data)
3167 {
3168 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3169
3170 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3171 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3172 }
3173
3174 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3175 txl_emit(
3176 const struct lp_build_tgsi_action * action,
3177 struct lp_build_tgsi_context * bld_base,
3178 struct lp_build_emit_data * emit_data)
3179 {
3180 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3181
3182 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3183 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3184 }
3185
3186 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3187 txl2_emit(
3188 const struct lp_build_tgsi_action * action,
3189 struct lp_build_tgsi_context * bld_base,
3190 struct lp_build_emit_data * emit_data)
3191 {
3192 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3193
3194 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3195 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3196 }
3197
3198 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3199 txp_emit(
3200 const struct lp_build_tgsi_action * action,
3201 struct lp_build_tgsi_context * bld_base,
3202 struct lp_build_emit_data * emit_data)
3203 {
3204 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3205
3206 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3207 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3208 }
3209
3210 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3211 tg4_emit(
3212 const struct lp_build_tgsi_action * action,
3213 struct lp_build_tgsi_context * bld_base,
3214 struct lp_build_emit_data * emit_data)
3215 {
3216 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3217
3218 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3219 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3220 }
3221
3222 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3223 lodq_emit(
3224 const struct lp_build_tgsi_action * action,
3225 struct lp_build_tgsi_context * bld_base,
3226 struct lp_build_emit_data * emit_data)
3227 {
3228 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3229
3230 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3231 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3232 }
3233
3234 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3235 txq_emit(
3236 const struct lp_build_tgsi_action * action,
3237 struct lp_build_tgsi_context * bld_base,
3238 struct lp_build_emit_data * emit_data)
3239 {
3240 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3241
3242 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3243 }
3244
3245 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3246 txf_emit(
3247 const struct lp_build_tgsi_action * action,
3248 struct lp_build_tgsi_context * bld_base,
3249 struct lp_build_emit_data * emit_data)
3250 {
3251 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3252
3253 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3254 }
3255
3256 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3257 sample_i_emit(
3258 const struct lp_build_tgsi_action * action,
3259 struct lp_build_tgsi_context * bld_base,
3260 struct lp_build_emit_data * emit_data)
3261 {
3262 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3263
3264 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3265 }
3266
3267 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3268 sample_emit(
3269 const struct lp_build_tgsi_action * action,
3270 struct lp_build_tgsi_context * bld_base,
3271 struct lp_build_emit_data * emit_data)
3272 {
3273 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3274
3275 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3276 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3277 }
3278
3279 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3280 sample_b_emit(
3281 const struct lp_build_tgsi_action * action,
3282 struct lp_build_tgsi_context * bld_base,
3283 struct lp_build_emit_data * emit_data)
3284 {
3285 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3286
3287 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3288 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3289 }
3290
3291 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3292 sample_c_emit(
3293 const struct lp_build_tgsi_action * action,
3294 struct lp_build_tgsi_context * bld_base,
3295 struct lp_build_emit_data * emit_data)
3296 {
3297 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3298
3299 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3300 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3301 }
3302
3303 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3304 sample_c_lz_emit(
3305 const struct lp_build_tgsi_action * action,
3306 struct lp_build_tgsi_context * bld_base,
3307 struct lp_build_emit_data * emit_data)
3308 {
3309 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3310
3311 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3312 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3313 }
3314
3315 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3316 sample_d_emit(
3317 const struct lp_build_tgsi_action * action,
3318 struct lp_build_tgsi_context * bld_base,
3319 struct lp_build_emit_data * emit_data)
3320 {
3321 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3322
3323 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3324 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3325 }
3326
3327 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3328 sample_l_emit(
3329 const struct lp_build_tgsi_action * action,
3330 struct lp_build_tgsi_context * bld_base,
3331 struct lp_build_emit_data * emit_data)
3332 {
3333 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3334
3335 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3336 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3337 }
3338
3339 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3340 gather4_emit(
3341 const struct lp_build_tgsi_action * action,
3342 struct lp_build_tgsi_context * bld_base,
3343 struct lp_build_emit_data * emit_data)
3344 {
3345 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3346
3347 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3348 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
3349 }
3350
3351 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3352 sviewinfo_emit(
3353 const struct lp_build_tgsi_action * action,
3354 struct lp_build_tgsi_context * bld_base,
3355 struct lp_build_emit_data * emit_data)
3356 {
3357 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3358
3359 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3360 }
3361
3362 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3363 lod_emit(
3364 const struct lp_build_tgsi_action * action,
3365 struct lp_build_tgsi_context * bld_base,
3366 struct lp_build_emit_data * emit_data)
3367 {
3368 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3369
3370 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3371 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
3372 }
3373
target_to_dims_layer(unsigned target,unsigned * dims,unsigned * layer_coord)3374 static void target_to_dims_layer(unsigned target,
3375 unsigned *dims,
3376 unsigned *layer_coord)
3377 {
3378 *layer_coord = 0;
3379 switch (target) {
3380 case TGSI_TEXTURE_1D:
3381 case TGSI_TEXTURE_BUFFER:
3382 *dims = 1;
3383 break;
3384 case TGSI_TEXTURE_1D_ARRAY:
3385 *layer_coord = 1;
3386 *dims = 1;
3387 break;
3388 case TGSI_TEXTURE_2D:
3389 case TGSI_TEXTURE_RECT:
3390 *dims = 2;
3391 break;
3392 case TGSI_TEXTURE_2D_ARRAY:
3393 *layer_coord = 2;
3394 *dims = 2;
3395 break;
3396 case TGSI_TEXTURE_3D:
3397 case TGSI_TEXTURE_CUBE:
3398 case TGSI_TEXTURE_CUBE_ARRAY:
3399 *dims = 3;
3400 break;
3401 default:
3402 assert(0);
3403 *dims = 0;
3404 return;
3405 }
3406 }
3407
3408 static void
img_load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3409 img_load_emit(
3410 const struct lp_build_tgsi_action * action,
3411 struct lp_build_tgsi_context * bld_base,
3412 struct lp_build_emit_data * emit_data)
3413 {
3414 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3415 struct lp_img_params params;
3416 LLVMValueRef coords[5];
3417 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3418 unsigned dims;
3419 unsigned target = emit_data->inst->Memory.Texture;
3420 unsigned layer_coord;
3421
3422 target_to_dims_layer(target, &dims, &layer_coord);
3423
3424 for (unsigned i = 0; i < dims; i++) {
3425 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3426 }
3427 for (unsigned i = dims; i < 5; i++) {
3428 coords[i] = coord_undef;
3429 }
3430 if (layer_coord)
3431 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3432
3433 memset(¶ms, 0, sizeof(params));
3434
3435 params.type = bld->bld_base.base.type;
3436 params.context_ptr = bld->context_ptr;
3437 params.thread_data_ptr = bld->thread_data_ptr;
3438 params.coords = coords;
3439 params.outdata = emit_data->output;
3440 params.target = tgsi_to_pipe_tex_target(target);
3441 params.image_index = emit_data->inst->Src[0].Register.Index;
3442 params.img_op = LP_IMG_LOAD;
3443 bld->image->emit_op(bld->image,
3444 bld->bld_base.base.gallivm,
3445 ¶ms);
3446 }
3447
3448 static void
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3449 load_emit(
3450 const struct lp_build_tgsi_action * action,
3451 struct lp_build_tgsi_context * bld_base,
3452 struct lp_build_emit_data * emit_data)
3453 {
3454 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3455 struct gallivm_state * gallivm = bld_base->base.gallivm;
3456 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3457 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3458 unsigned buf = bufreg->Register.Index;
3459 assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3460 bufreg->Register.File == TGSI_FILE_IMAGE ||
3461 bufreg->Register.File == TGSI_FILE_MEMORY ||
3462 bufreg->Register.File == TGSI_FILE_CONSTBUF);
3463 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3464 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3465
3466 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3467 img_load_emit(action, bld_base, emit_data);
3468 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3469 LLVMValueRef consts_ptr = bld->consts[buf];
3470 LLVMValueRef num_consts = bld->consts_sizes[buf];
3471
3472 LLVMValueRef indirect_index;
3473 LLVMValueRef overflow_mask;
3474
3475 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3476 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3477
3478 /* All fetches are from the same constant buffer, so
3479 * we need to propagate the size to a vector to do a
3480 * vector comparison */
3481 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3482
3483 /* Gather values from the constant buffer */
3484 unsigned chan_index;
3485 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3486 /* Construct a boolean vector telling us which channels
3487 * overflow the bound constant buffer */
3488 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3489 indirect_index, num_consts);
3490
3491 /* index_vec = indirect_index * 4 */
3492 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3493 index_vec = lp_build_add(uint_bld, index_vec,
3494 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3495
3496 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3497 }
3498 } else if (0) {
3499 /* for indirect support with ARB_gpu_shader5 */
3500 } else {
3501 LLVMValueRef index;
3502 LLVMValueRef scalar, scalar_ptr;
3503 unsigned chan_index;
3504
3505 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3506 index = lp_build_shr_imm(uint_bld, index, 2);
3507
3508 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3509
3510 LLVMValueRef ssbo_limit = NULL;
3511
3512 if (!is_shared) {
3513 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3514 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3515 }
3516
3517 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3518 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3519
3520 LLVMValueRef exec_mask = mask_vec(bld_base);
3521 if (!is_shared) {
3522 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3523 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3524 }
3525
3526 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3527 struct lp_build_loop_state loop_state;
3528 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3529
3530 struct lp_build_if_state ifthen;
3531 LLVMValueRef cond, temp_res;
3532
3533 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3534 loop_state.counter, "");
3535
3536 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3537 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3538
3539 lp_build_if(&ifthen, gallivm, cond);
3540 scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
3541
3542 temp_res = LLVMBuildLoad(builder, result, "");
3543 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3544 LLVMBuildStore(builder, temp_res, result);
3545 lp_build_else(&ifthen);
3546 temp_res = LLVMBuildLoad(builder, result, "");
3547 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3548 LLVMBuildStore(builder, temp_res, result);
3549 lp_build_endif(&ifthen);
3550 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3551 NULL, LLVMIntUGE);
3552 emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
3553 }
3554 }
3555 }
3556
3557 static void
img_store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3558 img_store_emit(
3559 const struct lp_build_tgsi_action * action,
3560 struct lp_build_tgsi_context * bld_base,
3561 struct lp_build_emit_data * emit_data)
3562 {
3563 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3564 struct lp_img_params params;
3565 LLVMValueRef coords[5];
3566 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3567 unsigned dims;
3568 unsigned target = emit_data->inst->Memory.Texture;
3569 unsigned layer_coord;
3570
3571 target_to_dims_layer(target, &dims, &layer_coord);
3572 for (unsigned i = 0; i < dims; i++) {
3573 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3574 }
3575 for (unsigned i = dims; i < 5; i++) {
3576 coords[i] = coord_undef;
3577 }
3578 if (layer_coord)
3579 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3580 memset(¶ms, 0, sizeof(params));
3581
3582 params.type = bld->bld_base.base.type;
3583 params.context_ptr = bld->context_ptr;
3584 params.thread_data_ptr = bld->thread_data_ptr;
3585 params.coords = coords;
3586 params.outdata = NULL;
3587 params.exec_mask = mask_vec(bld_base);
3588 params.target = tgsi_to_pipe_tex_target(target);
3589 params.image_index = emit_data->inst->Dst[0].Register.Index;
3590 params.img_op = LP_IMG_STORE;
3591 for (unsigned i = 0; i < 4; i++)
3592 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3593
3594 bld->image->emit_op(bld->image,
3595 bld->bld_base.base.gallivm,
3596 ¶ms);
3597 }
3598
3599 static void
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3600 store_emit(
3601 const struct lp_build_tgsi_action * action,
3602 struct lp_build_tgsi_context * bld_base,
3603 struct lp_build_emit_data * emit_data)
3604 {
3605 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3606 struct gallivm_state * gallivm = bld_base->base.gallivm;
3607 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3608 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3609 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3610 unsigned buf = bufreg->Register.Index;
3611 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3612 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3613
3614 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3615 img_store_emit(action, bld_base, emit_data);
3616 } else if (0) {
3617
3618 } else {
3619 LLVMValueRef index; /* index into the const buffer */
3620 LLVMValueRef scalar_ptr;
3621 LLVMValueRef value;
3622 unsigned chan_index;
3623
3624 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3625 index = lp_build_shr_imm(uint_bld, index, 2);
3626
3627 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3628
3629 LLVMValueRef ssbo_limit = NULL;
3630
3631 if (!is_shared) {
3632 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3633 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3634 }
3635
3636 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3637 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3638
3639 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3640
3641 LLVMValueRef exec_mask = mask_vec(bld_base);
3642 if (!is_shared) {
3643 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3644 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3645 }
3646
3647 struct lp_build_loop_state loop_state;
3648 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3649
3650 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3651 loop_state.counter, "");
3652 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3653
3654 struct lp_build_if_state ifthen;
3655 LLVMValueRef cond;
3656
3657 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3658 loop_state.counter, "");
3659
3660 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3661 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3662 lp_build_if(&ifthen, gallivm, cond);
3663
3664 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3665
3666 lp_build_endif(&ifthen);
3667 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3668 NULL, LLVMIntUGE);
3669 }
3670 }
3671 }
3672
3673 static void
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3674 resq_emit(
3675 const struct lp_build_tgsi_action * action,
3676 struct lp_build_tgsi_context * bld_base,
3677 struct lp_build_emit_data * emit_data)
3678 {
3679 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3680 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3681 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3682
3683 unsigned buf = bufreg->Register.Index;
3684 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3685
3686 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3687 unsigned target = emit_data->inst->Memory.Texture;
3688 struct lp_sampler_size_query_params params = { 0 };
3689 params.int_type = bld->bld_base.int_bld.type;
3690 params.texture_unit = buf;
3691 params.target = tgsi_to_pipe_tex_target(target);
3692 params.context_ptr = bld->context_ptr;
3693 params.sizes_out = emit_data->output;
3694
3695 bld->image->emit_size_query(bld->image,
3696 bld->bld_base.base.gallivm,
3697 ¶ms);
3698 } else {
3699 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3700
3701 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3702 }
3703 }
3704
3705 static void
img_atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMAtomicRMWBinOp op)3706 img_atomic_emit(
3707 const struct lp_build_tgsi_action * action,
3708 struct lp_build_tgsi_context * bld_base,
3709 struct lp_build_emit_data * emit_data,
3710 LLVMAtomicRMWBinOp op)
3711 {
3712 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3713 struct lp_img_params params;
3714 LLVMValueRef coords[5];
3715 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3716 unsigned dims;
3717 unsigned layer_coord;
3718 unsigned target = emit_data->inst->Memory.Texture;
3719
3720 target_to_dims_layer(target, &dims, &layer_coord);
3721
3722 for (unsigned i = 0; i < dims; i++) {
3723 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3724 }
3725 for (unsigned i = dims; i < 5; i++) {
3726 coords[i] = coord_undef;
3727 }
3728 if (layer_coord)
3729 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3730 memset(¶ms, 0, sizeof(params));
3731
3732 params.type = bld->bld_base.base.type;
3733 params.context_ptr = bld->context_ptr;
3734 params.thread_data_ptr = bld->thread_data_ptr;
3735 params.exec_mask = mask_vec(bld_base);
3736 params.image_index = emit_data->inst->Src[0].Register.Index;
3737 params.coords = coords;
3738 params.target = tgsi_to_pipe_tex_target(target);
3739 params.op = op;
3740 params.outdata = emit_data->output;
3741 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3742
3743 for (unsigned i = 0; i < 4; i++)
3744 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3745 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3746 for (unsigned i = 0; i < 4; i++)
3747 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3748 }
3749 bld->image->emit_op(bld->image,
3750 bld->bld_base.base.gallivm,
3751 ¶ms);
3752 }
3753
3754 static void
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3755 atomic_emit(
3756 const struct lp_build_tgsi_action * action,
3757 struct lp_build_tgsi_context * bld_base,
3758 struct lp_build_emit_data * emit_data)
3759 {
3760 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3761 struct gallivm_state * gallivm = bld_base->base.gallivm;
3762 LLVMBuilderRef builder = gallivm->builder;
3763 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3764 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3765
3766 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3767 unsigned buf = bufreg->Register.Index;
3768 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3769
3770 LLVMAtomicRMWBinOp op = -1;
3771 switch (emit_data->inst->Instruction.Opcode) {
3772 case TGSI_OPCODE_ATOMUADD:
3773 op = LLVMAtomicRMWBinOpAdd;
3774 break;
3775 case TGSI_OPCODE_ATOMXCHG:
3776 op = LLVMAtomicRMWBinOpXchg;
3777 break;
3778 case TGSI_OPCODE_ATOMAND:
3779 op = LLVMAtomicRMWBinOpAnd;
3780 break;
3781 case TGSI_OPCODE_ATOMOR:
3782 op = LLVMAtomicRMWBinOpOr;
3783 break;
3784 case TGSI_OPCODE_ATOMXOR:
3785 op = LLVMAtomicRMWBinOpXor;
3786 break;
3787 case TGSI_OPCODE_ATOMUMIN:
3788 op = LLVMAtomicRMWBinOpUMin;
3789 break;
3790 case TGSI_OPCODE_ATOMUMAX:
3791 op = LLVMAtomicRMWBinOpUMax;
3792 break;
3793 case TGSI_OPCODE_ATOMIMIN:
3794 op = LLVMAtomicRMWBinOpMin;
3795 break;
3796 case TGSI_OPCODE_ATOMIMAX:
3797 op = LLVMAtomicRMWBinOpMax;
3798 break;
3799 case TGSI_OPCODE_ATOMCAS:
3800 break;
3801 default:
3802 assert(0);
3803 return;
3804 }
3805
3806 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3807 img_atomic_emit(action, bld_base, emit_data, op);
3808 } else if (0) {
3809 } else {
3810 LLVMValueRef index; /* index into the const buffer */
3811 LLVMValueRef scalar, scalar_ptr;
3812 LLVMValueRef value;
3813
3814 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3815 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3816
3817 index = lp_build_shr_imm(uint_bld, index, 2);
3818
3819 if (!is_shared) {
3820 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3821 scalar_ptr = bld->ssbos[buf];
3822 } else
3823 scalar_ptr = bld->shared_ptr;
3824
3825 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3826 uint_bld->vec_type, "");
3827
3828 LLVMValueRef ssbo_limit;
3829 if (!is_shared) {
3830 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3831 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3832 }
3833
3834 LLVMValueRef exec_mask = mask_vec(bld_base);
3835
3836 if (!is_shared) {
3837 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3838 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3839 }
3840
3841 struct lp_build_loop_state loop_state;
3842 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3843
3844 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3845 loop_state.counter, "");
3846 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3847
3848 index = LLVMBuildExtractElement(gallivm->builder, index,
3849 loop_state.counter, "");
3850
3851 scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
3852 &index, 1, "");
3853
3854 struct lp_build_if_state ifthen;
3855 LLVMValueRef cond, temp_res;
3856
3857 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3858 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3859 lp_build_if(&ifthen, gallivm, cond);
3860
3861 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3862 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3863 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3864 loop_state.counter, "");
3865 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3866 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3867 cas_src_ptr,
3868 LLVMAtomicOrderingSequentiallyConsistent,
3869 LLVMAtomicOrderingSequentiallyConsistent,
3870 false);
3871 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3872 } else {
3873 scalar = LLVMBuildAtomicRMW(builder, op,
3874 scalar_ptr, value_ptr,
3875 LLVMAtomicOrderingSequentiallyConsistent,
3876 false);
3877 }
3878 temp_res = LLVMBuildLoad(builder, atom_res, "");
3879 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3880 LLVMBuildStore(builder, temp_res, atom_res);
3881 lp_build_else(&ifthen);
3882 temp_res = LLVMBuildLoad(builder, atom_res, "");
3883 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3884 LLVMBuildStore(builder, temp_res, atom_res);
3885 lp_build_endif(&ifthen);
3886
3887 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3888 NULL, LLVMIntUGE);
3889 emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
3890 }
3891 }
3892
3893 static void
barrier_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3894 barrier_emit(
3895 const struct lp_build_tgsi_action * action,
3896 struct lp_build_tgsi_context * bld_base,
3897 struct lp_build_emit_data * emit_data)
3898 {
3899 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3900 struct gallivm_state * gallivm = bld_base->base.gallivm;
3901
3902 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3903
3904 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3905 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3906 }
3907
3908 static void
membar_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3909 membar_emit(
3910 const struct lp_build_tgsi_action * action,
3911 struct lp_build_tgsi_context * bld_base,
3912 struct lp_build_emit_data * emit_data)
3913 {
3914 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3915 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3916 }
3917
3918 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3919 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3920 LLVMValueRef ptr,
3921 LLVMValueRef mask)
3922 {
3923 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3924 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3925
3926 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3927
3928 LLVMBuildStore(builder, current_vec, ptr);
3929 }
3930
3931 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3932 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3933 LLVMValueRef ptr,
3934 LLVMValueRef mask)
3935 {
3936 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3937 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3938
3939 current_vec = lp_build_select(&bld_base->uint_bld,
3940 mask,
3941 bld_base->uint_bld.zero,
3942 current_vec);
3943
3944 LLVMBuildStore(builder, current_vec, ptr);
3945 }
3946
3947 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3948 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3949 LLVMValueRef current_mask_vec,
3950 LLVMValueRef total_emitted_vertices_vec)
3951 {
3952 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3953 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3954 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3955 total_emitted_vertices_vec,
3956 bld->max_output_vertices_vec);
3957
3958 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3959 }
3960
3961 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3962 emit_vertex(
3963 const struct lp_build_tgsi_action * action,
3964 struct lp_build_tgsi_context * bld_base,
3965 struct lp_build_emit_data * emit_data)
3966 {
3967 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3968 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3969
3970 if (bld->gs_iface->emit_vertex) {
3971 LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3972 TGSI_TYPE_UNSIGNED,
3973 emit_data->inst->Src[0].Register.SwizzleX);
3974 LLVMValueRef mask = mask_vec(bld_base);
3975 LLVMValueRef total_emitted_vertices_vec =
3976 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3977
3978 mask = clamp_mask_to_max_output_vertices(bld, mask,
3979 total_emitted_vertices_vec);
3980 gather_outputs(bld);
3981 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
3982 bld->outputs,
3983 total_emitted_vertices_vec,
3984 mask,
3985 stream_id);
3986 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3987 mask);
3988 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3989 mask);
3990 #if DUMP_GS_EMITS
3991 lp_build_print_value(bld->bld_base.base.gallivm,
3992 " +++ emit vertex masked ones = ",
3993 mask);
3994 lp_build_print_value(bld->bld_base.base.gallivm,
3995 " +++ emit vertex emitted = ",
3996 total_emitted_vertices_vec);
3997 #endif
3998 }
3999 }
4000
4001
4002 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)4003 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
4004 LLVMValueRef mask)
4005 {
4006 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4007 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4008
4009 if (bld->gs_iface->end_primitive) {
4010 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4011 LLVMValueRef emitted_vertices_vec =
4012 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
4013 LLVMValueRef emitted_prims_vec =
4014 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4015 LLVMValueRef total_emitted_vertices_vec =
4016 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4017 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4018 emitted_vertices_vec,
4019 uint_bld->zero);
4020 /* We need to combine the current execution mask with the mask
4021 telling us which, if any, execution slots actually have
4022 unemitted primitives, this way we make sure that end_primitives
4023 executes only on the paths that have unflushed vertices */
4024 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4025
4026 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4027 total_emitted_vertices_vec,
4028 emitted_vertices_vec,
4029 emitted_prims_vec,
4030 mask_vec(bld_base), 0);
4031
4032 #if DUMP_GS_EMITS
4033 lp_build_print_value(bld->bld_base.base.gallivm,
4034 " +++ end prim masked ones = ",
4035 mask);
4036 lp_build_print_value(bld->bld_base.base.gallivm,
4037 " +++ end prim emitted verts1 = ",
4038 emitted_vertices_vec);
4039 lp_build_print_value(bld->bld_base.base.gallivm,
4040 " +++ end prim emitted prims1 = ",
4041 LLVMBuildLoad(builder,
4042 bld->emitted_prims_vec_ptr, ""));
4043 #endif
4044 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4045 mask);
4046 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4047 mask);
4048 #if DUMP_GS_EMITS
4049 lp_build_print_value(bld->bld_base.base.gallivm,
4050 " +++ end prim emitted verts2 = ",
4051 LLVMBuildLoad(builder,
4052 bld->emitted_vertices_vec_ptr, ""));
4053 #endif
4054 }
4055
4056 }
4057
4058 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4059 end_primitive(
4060 const struct lp_build_tgsi_action * action,
4061 struct lp_build_tgsi_context * bld_base,
4062 struct lp_build_emit_data * emit_data)
4063 {
4064 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4065
4066 if (bld->gs_iface->end_primitive) {
4067 LLVMValueRef mask = mask_vec(bld_base);
4068 end_primitive_masked(bld_base, mask);
4069 }
4070 }
4071
4072 static void
barrier_emit_tcs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4073 barrier_emit_tcs(
4074 const struct lp_build_tgsi_action * action,
4075 struct lp_build_tgsi_context * bld_base,
4076 struct lp_build_emit_data * emit_data)
4077 {
4078 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4079
4080 if (bld->tcs_iface->emit_barrier) {
4081 bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4082 }
4083 }
4084
4085
4086 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4087 cal_emit(
4088 const struct lp_build_tgsi_action * action,
4089 struct lp_build_tgsi_context * bld_base,
4090 struct lp_build_emit_data * emit_data)
4091 {
4092 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4093
4094 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4095 &bld_base->pc);
4096 }
4097
4098 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4099 ret_emit(
4100 const struct lp_build_tgsi_action * action,
4101 struct lp_build_tgsi_context * bld_base,
4102 struct lp_build_emit_data * emit_data)
4103 {
4104 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4105
4106 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4107 }
4108
4109 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4110 brk_emit(
4111 const struct lp_build_tgsi_action * action,
4112 struct lp_build_tgsi_context * bld_base,
4113 struct lp_build_emit_data * emit_data)
4114 {
4115 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4116
4117 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4118 }
4119
4120 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4121 if_emit(
4122 const struct lp_build_tgsi_action * action,
4123 struct lp_build_tgsi_context * bld_base,
4124 struct lp_build_emit_data * emit_data)
4125 {
4126 LLVMValueRef tmp;
4127 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4128
4129 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4130 emit_data->args[0], bld->bld_base.base.zero);
4131 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4132 }
4133
4134 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4135 uif_emit(
4136 const struct lp_build_tgsi_action * action,
4137 struct lp_build_tgsi_context * bld_base,
4138 struct lp_build_emit_data * emit_data)
4139 {
4140 LLVMValueRef tmp;
4141 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4142 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4143
4144 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4145 emit_data->args[0], uint_bld->zero);
4146 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4147 }
4148
4149 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4150 case_emit(
4151 const struct lp_build_tgsi_action * action,
4152 struct lp_build_tgsi_context * bld_base,
4153 struct lp_build_emit_data * emit_data)
4154 {
4155 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4156
4157 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4158 }
4159
4160 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4161 default_emit(
4162 const struct lp_build_tgsi_action * action,
4163 struct lp_build_tgsi_context * bld_base,
4164 struct lp_build_emit_data * emit_data)
4165 {
4166 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4167
4168 lp_exec_default(&bld->exec_mask, bld_base);
4169 }
4170
4171 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4172 switch_emit(
4173 const struct lp_build_tgsi_action * action,
4174 struct lp_build_tgsi_context * bld_base,
4175 struct lp_build_emit_data * emit_data)
4176 {
4177 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4178
4179 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4180 }
4181
4182 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4183 endswitch_emit(
4184 const struct lp_build_tgsi_action * action,
4185 struct lp_build_tgsi_context * bld_base,
4186 struct lp_build_emit_data * emit_data)
4187 {
4188 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4189
4190 lp_exec_endswitch(&bld->exec_mask, bld_base);
4191 }
4192
4193 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4194 bgnloop_emit(
4195 const struct lp_build_tgsi_action * action,
4196 struct lp_build_tgsi_context * bld_base,
4197 struct lp_build_emit_data * emit_data)
4198 {
4199 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4200
4201 lp_exec_bgnloop(&bld->exec_mask, true);
4202 }
4203
4204 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4205 bgnsub_emit(
4206 const struct lp_build_tgsi_action * action,
4207 struct lp_build_tgsi_context * bld_base,
4208 struct lp_build_emit_data * emit_data)
4209 {
4210 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4211
4212 lp_exec_mask_bgnsub(&bld->exec_mask);
4213 }
4214
4215 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4216 else_emit(
4217 const struct lp_build_tgsi_action * action,
4218 struct lp_build_tgsi_context * bld_base,
4219 struct lp_build_emit_data * emit_data)
4220 {
4221 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4222
4223 lp_exec_mask_cond_invert(&bld->exec_mask);
4224 }
4225
4226 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4227 endif_emit(
4228 const struct lp_build_tgsi_action * action,
4229 struct lp_build_tgsi_context * bld_base,
4230 struct lp_build_emit_data * emit_data)
4231 {
4232 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4233
4234 lp_exec_mask_cond_pop(&bld->exec_mask);
4235 }
4236
4237 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4238 endloop_emit(
4239 const struct lp_build_tgsi_action * action,
4240 struct lp_build_tgsi_context * bld_base,
4241 struct lp_build_emit_data * emit_data)
4242 {
4243 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4244
4245 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
4246 }
4247
4248 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4249 endsub_emit(
4250 const struct lp_build_tgsi_action * action,
4251 struct lp_build_tgsi_context * bld_base,
4252 struct lp_build_emit_data * emit_data)
4253 {
4254 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4255
4256 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4257 }
4258
4259 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4260 cont_emit(
4261 const struct lp_build_tgsi_action * action,
4262 struct lp_build_tgsi_context * bld_base,
4263 struct lp_build_emit_data * emit_data)
4264 {
4265 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4266
4267 lp_exec_continue(&bld->exec_mask);
4268 }
4269
emit_prologue(struct lp_build_tgsi_context * bld_base)4270 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4271 {
4272 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4273 struct gallivm_state * gallivm = bld_base->base.gallivm;
4274
4275 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4276 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4277 bld->temps_array = lp_build_alloca_undef(gallivm,
4278 LLVMArrayType(bld_base->base.vec_type, array_size),
4279 "temp_array");
4280 }
4281
4282 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4283 LLVMValueRef array_size =
4284 lp_build_const_int32(gallivm,
4285 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4286 bld->outputs_array = lp_build_array_alloca(gallivm,
4287 bld_base->base.vec_type, array_size,
4288 "output_array");
4289 }
4290
4291 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4292 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4293 bld->imms_array = lp_build_alloca_undef(gallivm,
4294 LLVMArrayType(bld_base->base.vec_type, array_size),
4295 "imms_array");
4296 }
4297
4298 /* If we have indirect addressing in inputs we need to copy them into
4299 * our alloca array to be able to iterate over them */
4300 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4301 !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4302 unsigned index, chan;
4303 LLVMTypeRef vec_type = bld_base->base.vec_type;
4304 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4305 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4306 bld->inputs_array = lp_build_array_alloca(gallivm,
4307 vec_type, array_size,
4308 "input_array");
4309
4310 assert(bld_base->info->num_inputs
4311 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4312
4313 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4314 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4315 LLVMValueRef lindex =
4316 lp_build_const_int32(gallivm, index * 4 + chan);
4317 LLVMValueRef input_ptr =
4318 LLVMBuildGEP(gallivm->builder, bld->inputs_array,
4319 &lindex, 1, "");
4320 LLVMValueRef value = bld->inputs[index][chan];
4321 if (value)
4322 LLVMBuildStore(gallivm->builder, value, input_ptr);
4323 }
4324 }
4325 }
4326
4327 if (bld->gs_iface) {
4328 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4329 bld->emitted_prims_vec_ptr =
4330 lp_build_alloca(gallivm,
4331 uint_bld->vec_type,
4332 "emitted_prims_ptr");
4333 bld->emitted_vertices_vec_ptr =
4334 lp_build_alloca(gallivm,
4335 uint_bld->vec_type,
4336 "emitted_vertices_ptr");
4337 bld->total_emitted_vertices_vec_ptr =
4338 lp_build_alloca(gallivm,
4339 uint_bld->vec_type,
4340 "total_emitted_vertices_ptr");
4341
4342 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4343 bld->emitted_prims_vec_ptr);
4344 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4345 bld->emitted_vertices_vec_ptr);
4346 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4347 bld->total_emitted_vertices_vec_ptr);
4348 }
4349
4350 if (DEBUG_EXECUTION) {
4351 lp_build_printf(gallivm, "\n");
4352 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4353 if (!bld->gs_iface)
4354 emit_dump_file(bld, TGSI_FILE_INPUT);
4355 }
4356 }
4357
emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)4358 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4359 {
4360 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4361
4362 if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4363 bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4364 }
4365 }
4366
emit_epilogue(struct lp_build_tgsi_context * bld_base)4367 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4368 {
4369 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4370 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4371
4372 if (DEBUG_EXECUTION) {
4373 /* for debugging */
4374 if (0) {
4375 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4376 }
4377 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4378 lp_build_printf(bld_base->base.gallivm, "\n");
4379 }
4380
4381 if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4382 bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4383 }
4384
4385 /* If we have indirect addressing in outputs we need to copy our alloca array
4386 * to the outputs slots specified by the caller */
4387 if (bld->gs_iface) {
4388 LLVMValueRef total_emitted_vertices_vec;
4389 LLVMValueRef emitted_prims_vec;
4390 /* implicit end_primitives, needed in case there are any unflushed
4391 vertices in the cache. Note must not call end_primitive here
4392 since the exec_mask is not valid at this point. */
4393 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4394
4395 total_emitted_vertices_vec =
4396 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
4397 emitted_prims_vec =
4398 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
4399
4400 bld->gs_iface->gs_epilogue(bld->gs_iface,
4401 total_emitted_vertices_vec,
4402 emitted_prims_vec, 0);
4403 } else {
4404 gather_outputs(bld);
4405 }
4406 }
4407
4408 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])4409 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4410 const struct tgsi_token *tokens,
4411 const struct lp_build_tgsi_params *params,
4412 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4413 {
4414 struct lp_build_tgsi_soa_context bld;
4415 struct lp_type type = params->type;
4416 struct lp_type res_type;
4417
4418 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4419 memset(&res_type, 0, sizeof res_type);
4420 res_type.width = type.width;
4421 res_type.length = type.length;
4422 res_type.sign = 1;
4423
4424 /* Setup build context */
4425 memset(&bld, 0, sizeof bld);
4426 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4427 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4428 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4429 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4430 {
4431 struct lp_type dbl_type;
4432 dbl_type = type;
4433 dbl_type.width *= 2;
4434 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4435 }
4436 {
4437 struct lp_type uint64_type;
4438 uint64_type = lp_uint_type(type);
4439 uint64_type.width *= 2;
4440 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4441 }
4442 {
4443 struct lp_type int64_type;
4444 int64_type = lp_int_type(type);
4445 int64_type.width *= 2;
4446 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4447 }
4448 bld.mask = params->mask;
4449 bld.inputs = params->inputs;
4450 bld.outputs = outputs;
4451 bld.consts_ptr = params->consts_ptr;
4452 bld.const_sizes_ptr = params->const_sizes_ptr;
4453 bld.ssbo_ptr = params->ssbo_ptr;
4454 bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
4455 bld.sampler = params->sampler;
4456 bld.bld_base.info = params->info;
4457 bld.indirect_files = params->info->indirect_files;
4458 bld.context_ptr = params->context_ptr;
4459 bld.thread_data_ptr = params->thread_data_ptr;
4460 bld.image = params->image;
4461 bld.shared_ptr = params->shared_ptr;
4462 bld.coro = params->coro;
4463
4464 /*
4465 * If the number of temporaries is rather large then we just
4466 * allocate them as an array right from the start and treat
4467 * like indirect temporaries.
4468 */
4469 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4470 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4471 }
4472 /*
4473 * For performance reason immediates are always backed in a static
4474 * array, but if their number is too great, we have to use just
4475 * a dynamically allocated array.
4476 */
4477 bld.use_immediates_array =
4478 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4479 if (bld.use_immediates_array) {
4480 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4481 }
4482
4483
4484 bld.bld_base.soa = TRUE;
4485 bld.bld_base.emit_debug = emit_debug;
4486 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4487 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4488 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4489 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4490 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4491
4492 bld.bld_base.emit_store = emit_store;
4493 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4494 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4495 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4496
4497 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4498 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4499
4500 bld.bld_base.emit_prologue = emit_prologue;
4501 bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4502 bld.bld_base.emit_epilogue = emit_epilogue;
4503
4504 /* Set opcode actions */
4505 lp_set_default_actions_cpu(&bld.bld_base);
4506
4507 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4508 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4509 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4510 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4511 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4512 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4513 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4514 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4515 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4516 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4517 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4518 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4519 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4520 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4521 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4522 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4523 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4524 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4525 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4526 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4527 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4528 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4529 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4530 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4531 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4532 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4533 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4534 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4535 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4536 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4537 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4538 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4539 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4540 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4541 /* DX10 sampling ops */
4542 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4543 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4544 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4545 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4546 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4547 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4548 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4549 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4550 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4551 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4552 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4553
4554 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4555 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4556 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4557
4558 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4559 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4560 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4561 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4562 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4563 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4564 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4565 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4566 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4567 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4568
4569 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4570 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4571
4572 if (params->gs_iface) {
4573 /* There's no specific value for this because it should always
4574 * be set, but apps using ext_geometry_shader4 quite often
4575 * were forgetting so we're using MAX_VERTEX_VARYING from
4576 * that spec even though we could debug_assert if it's not
4577 * set, but that's a lot uglier. */
4578 uint max_output_vertices;
4579
4580 /* inputs are always indirect with gs */
4581 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4582 bld.gs_iface = params->gs_iface;
4583 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4584 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4585 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4586
4587 max_output_vertices =
4588 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4589 if (!max_output_vertices)
4590 max_output_vertices = 32;
4591
4592 bld.max_output_vertices_vec =
4593 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4594 max_output_vertices);
4595 }
4596
4597 if (params->tes_iface) {
4598 /* inputs are always indirect with tes */
4599 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4600 bld.tes_iface = params->tes_iface;
4601 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4602 }
4603
4604 if (params->tcs_iface) {
4605 bld.tcs_iface = params->tcs_iface;
4606 /* outputs and inputs are always indirect with tcs */
4607 bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4608 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4609 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4610 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4611 bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4612 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4613 }
4614
4615 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4616
4617 bld.system_values = *params->system_values;
4618
4619 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4620
4621 if (0) {
4622 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4623 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4624 debug_printf("11111111111111111111111111111 \n");
4625 tgsi_dump(tokens, 0);
4626 lp_debug_dump_value(function);
4627 debug_printf("2222222222222222222222222222 \n");
4628 }
4629
4630 if (0) {
4631 LLVMModuleRef module = LLVMGetGlobalParent(
4632 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4633 LLVMDumpModule(module);
4634
4635 }
4636 lp_exec_mask_fini(&bld.exec_mask);
4637 }
4638