1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_arit.h"
28 #include "lp_bld_bitarit.h"
29 #include "lp_bld_const.h"
30 #include "lp_bld_conv.h"
31 #include "lp_bld_gather.h"
32 #include "lp_bld_logic.h"
33 #include "lp_bld_quad.h"
34 #include "lp_bld_flow.h"
35 #include "lp_bld_intr.h"
36 #include "lp_bld_struct.h"
37 #include "lp_bld_debug.h"
38 #include "lp_bld_printf.h"
39 #include "nir_deref.h"
40 #include "nir_search_helpers.h"
41 
42 static void visit_cf_list(struct lp_build_nir_context *bld_base,
43                           struct exec_list *list);
44 
cast_type(struct lp_build_nir_context * bld_base,LLVMValueRef val,nir_alu_type alu_type,unsigned bit_size)45 static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
46                               nir_alu_type alu_type, unsigned bit_size)
47 {
48    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
49    switch (alu_type) {
50    case nir_type_float:
51       switch (bit_size) {
52       case 16:
53          return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
54       case 32:
55          return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
56       case 64:
57          return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
58       default:
59          assert(0);
60          break;
61       }
62       break;
63    case nir_type_int:
64       switch (bit_size) {
65       case 8:
66          return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
67       case 16:
68          return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
69       case 32:
70          return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
71       case 64:
72          return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
73       default:
74          assert(0);
75          break;
76       }
77       break;
78    case nir_type_uint:
79       switch (bit_size) {
80       case 8:
81          return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
82       case 16:
83          return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
84       case 1:
85       case 32:
86          return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
87       case 64:
88          return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
89       default:
90          assert(0);
91          break;
92       }
93       break;
94    case nir_type_uint32:
95       return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
96    default:
97       return val;
98    }
99    return NULL;
100 }
101 
102 
glsl_sampler_to_pipe(int sampler_dim,bool is_array)103 static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array)
104 {
105    unsigned pipe_target = PIPE_BUFFER;
106    switch (sampler_dim) {
107    case GLSL_SAMPLER_DIM_1D:
108       pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
109       break;
110    case GLSL_SAMPLER_DIM_2D:
111       pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
112       break;
113    case GLSL_SAMPLER_DIM_SUBPASS:
114    case GLSL_SAMPLER_DIM_SUBPASS_MS:
115       pipe_target = PIPE_TEXTURE_2D_ARRAY;
116       break;
117    case GLSL_SAMPLER_DIM_3D:
118       pipe_target = PIPE_TEXTURE_3D;
119       break;
120    case GLSL_SAMPLER_DIM_MS:
121       pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
122       break;
123    case GLSL_SAMPLER_DIM_CUBE:
124       pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
125       break;
126    case GLSL_SAMPLER_DIM_RECT:
127       pipe_target = PIPE_TEXTURE_RECT;
128       break;
129    case GLSL_SAMPLER_DIM_BUF:
130       pipe_target = PIPE_BUFFER;
131       break;
132    default:
133       break;
134    }
135    return pipe_target;
136 }
137 
get_ssa_src(struct lp_build_nir_context * bld_base,nir_ssa_def * ssa)138 static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
139 {
140    return bld_base->ssa_defs[ssa->index];
141 }
142 
143 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src);
144 
get_reg_src(struct lp_build_nir_context * bld_base,nir_reg_src src)145 static LLVMValueRef get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
146 {
147    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
148    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
149    struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
150    LLVMValueRef indir_src = NULL;
151    if (src.indirect)
152       indir_src = get_src(bld_base, *src.indirect);
153    return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
154 }
155 
get_src(struct lp_build_nir_context * bld_base,nir_src src)156 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src)
157 {
158    if (src.is_ssa)
159       return get_ssa_src(bld_base, src.ssa);
160    else
161       return get_reg_src(bld_base, src.reg);
162 }
163 
assign_ssa(struct lp_build_nir_context * bld_base,int idx,LLVMValueRef ptr)164 static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
165 {
166    bld_base->ssa_defs[idx] = ptr;
167 }
168 
assign_ssa_dest(struct lp_build_nir_context * bld_base,const nir_ssa_def * ssa,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])169 static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
170                             LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
171 {
172    assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
173 }
174 
assign_reg(struct lp_build_nir_context * bld_base,const nir_reg_dest * reg,unsigned write_mask,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])175 static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
176                        unsigned write_mask,
177                        LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
178 {
179    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
180    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
181    struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
182    LLVMValueRef indir_src = NULL;
183    if (reg->indirect)
184       indir_src = get_src(bld_base, *reg->indirect);
185    bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
186 }
187 
assign_dest(struct lp_build_nir_context * bld_base,const nir_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])188 static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
189 {
190    if (dest->is_ssa)
191       assign_ssa_dest(bld_base, &dest->ssa, vals);
192    else
193       assign_reg(bld_base, &dest->reg, 0, vals);
194 }
195 
assign_alu_dest(struct lp_build_nir_context * bld_base,const nir_alu_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])196 static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
197 {
198    if (dest->dest.is_ssa)
199       assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
200    else
201       assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
202 }
203 
int_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,bool is_unsigned,LLVMValueRef val)204 static LLVMValueRef int_to_bool32(struct lp_build_nir_context *bld_base,
205                                 uint32_t src_bit_size,
206                                 bool is_unsigned,
207                                 LLVMValueRef val)
208 {
209    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
210    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
211    LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, int_bld->type, PIPE_FUNC_NOTEQUAL, val, int_bld->zero);
212    if (src_bit_size == 16)
213       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
214    else if (src_bit_size == 64)
215       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
216    return result;
217 }
218 
flt_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,LLVMValueRef val)219 static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
220                                   uint32_t src_bit_size,
221                                   LLVMValueRef val)
222 {
223    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
224    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
225    LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
226    if (src_bit_size == 64)
227       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
228    if (src_bit_size == 16)
229       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
230    return result;
231 }
232 
fcmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])233 static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
234                            enum pipe_compare_func compare,
235                            uint32_t src_bit_size,
236                            LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
237 {
238    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
239    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
240    LLVMValueRef result;
241 
242    if (compare != PIPE_FUNC_NOTEQUAL)
243       result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
244    else
245       result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
246    if (src_bit_size == 64)
247       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
248    else if (src_bit_size == 16)
249       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
250    return result;
251 }
252 
icmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,bool is_unsigned,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])253 static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base,
254                            enum pipe_compare_func compare,
255                            bool is_unsigned,
256                            uint32_t src_bit_size,
257                            LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
258 {
259    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
260    struct lp_build_context *i_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
261    LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
262    if (src_bit_size < 32)
263       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
264    else if (src_bit_size == 64)
265       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
266    return result;
267 }
268 
get_alu_src(struct lp_build_nir_context * bld_base,nir_alu_src src,unsigned num_components)269 static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
270                                 nir_alu_src src,
271                                 unsigned num_components)
272 {
273    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
274    struct gallivm_state *gallivm = bld_base->base.gallivm;
275    LLVMValueRef value = get_src(bld_base, src.src);
276    bool need_swizzle = false;
277 
278    assert(value);
279    unsigned src_components = nir_src_num_components(src.src);
280    for (unsigned i = 0; i < num_components; ++i) {
281       assert(src.swizzle[i] < src_components);
282       if (src.swizzle[i] != i)
283          need_swizzle = true;
284    }
285 
286    if (need_swizzle || num_components != src_components) {
287       if (src_components > 1 && num_components == 1) {
288          value = LLVMBuildExtractValue(gallivm->builder, value,
289                                        src.swizzle[0], "");
290       } else if (src_components == 1 && num_components > 1) {
291          LLVMValueRef values[] = {value, value, value, value, value, value, value, value, value, value, value, value, value, value, value, value};
292          value = lp_nir_array_build_gather_values(builder, values, num_components);
293       } else {
294          LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
295          for (unsigned i = 0; i < num_components; i++)
296             arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
297          value = arr;
298       }
299    }
300    assert(!src.negate);
301    assert(!src.abs);
302    return value;
303 }
304 
emit_b2f(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)305 static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
306                              LLVMValueRef src0,
307                              unsigned bitsize)
308 {
309    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
310    LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
311                                       LLVMBuildBitCast(builder, lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type,
312                                                                                    1.0), bld_base->int_bld.vec_type, ""),
313                                       "");
314    result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
315    switch (bitsize) {
316    case 16:
317       result = LLVMBuildFPTrunc(builder, result, bld_base->half_bld.vec_type, "");
318       break;
319    case 32:
320       break;
321    case 64:
322       result = LLVMBuildFPExt(builder, result, bld_base->dbl_bld.vec_type, "");
323       break;
324    default:
325       unreachable("unsupported bit size.");
326    }
327    return result;
328 }
329 
emit_b2i(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)330 static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base,
331                              LLVMValueRef src0,
332                              unsigned bitsize)
333 {
334    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
335    LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
336                                       lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, 1), "");
337    switch (bitsize) {
338    case 8:
339       return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
340    case 16:
341       return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
342    case 32:
343       return result;
344    case 64:
345       return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
346    default:
347       unreachable("unsupported bit size.");
348    }
349 }
350 
emit_b32csel(struct lp_build_nir_context * bld_base,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])351 static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base,
352                                unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
353                                LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
354 {
355    LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
356    LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
357    struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
358    return lp_build_select(bld, v, src[1], src[2]);
359 }
360 
split_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)361 static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
362                                 LLVMValueRef src,
363                                 bool hi)
364 {
365    struct gallivm_state *gallivm = bld_base->base.gallivm;
366    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
367    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
368    int len = bld_base->base.type.length * 2;
369    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
370 #if UTIL_ARCH_LITTLE_ENDIAN
371       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
372       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
373 #else
374       shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
375       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
376 #endif
377    }
378 
379    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
380    return LLVMBuildShuffleVector(gallivm->builder, src,
381                                  LLVMGetUndef(LLVMTypeOf(src)),
382                                  LLVMConstVector(hi ? shuffles2 : shuffles,
383                                                  bld_base->base.type.length),
384                                  "");
385 }
386 
387 static LLVMValueRef
merge_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)388 merge_64bit(struct lp_build_nir_context *bld_base,
389             LLVMValueRef input,
390             LLVMValueRef input2)
391 {
392    struct gallivm_state *gallivm = bld_base->base.gallivm;
393    LLVMBuilderRef builder = gallivm->builder;
394    int i;
395    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
396    int len = bld_base->base.type.length * 2;
397    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
398 
399    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
400 #if UTIL_ARCH_LITTLE_ENDIAN
401       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
402       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
403 #else
404       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
405       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
406 #endif
407    }
408    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
409 }
410 
split_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)411 static LLVMValueRef split_16bit(struct lp_build_nir_context *bld_base,
412                                 LLVMValueRef src,
413                                 bool hi)
414 {
415    struct gallivm_state *gallivm = bld_base->base.gallivm;
416    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
417    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
418    int len = bld_base->base.type.length * 2;
419    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
420 #if UTIL_ARCH_LITTLE_ENDIAN
421       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
422       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
423 #else
424       shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
425       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
426 #endif
427    }
428 
429    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
430    return LLVMBuildShuffleVector(gallivm->builder, src,
431                                  LLVMGetUndef(LLVMTypeOf(src)),
432                                  LLVMConstVector(hi ? shuffles2 : shuffles,
433                                                  bld_base->base.type.length),
434                                  "");
435 }
436 static LLVMValueRef
merge_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)437 merge_16bit(struct lp_build_nir_context *bld_base,
438             LLVMValueRef input,
439             LLVMValueRef input2)
440 {
441    struct gallivm_state *gallivm = bld_base->base.gallivm;
442    LLVMBuilderRef builder = gallivm->builder;
443    int i;
444    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
445    int len = bld_base->int16_bld.type.length * 2;
446    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
447 
448    for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
449 #if UTIL_ARCH_LITTLE_ENDIAN
450       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
451       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
452 #else
453       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
454       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
455 #endif
456    }
457    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
458 }
459 
get_signed_divisor(struct gallivm_state * gallivm,struct lp_build_context * int_bld,struct lp_build_context * mask_bld,int src_bit_size,LLVMValueRef src,LLVMValueRef divisor)460 static LLVMValueRef get_signed_divisor(struct gallivm_state *gallivm,
461                                        struct lp_build_context *int_bld,
462                                        struct lp_build_context *mask_bld,
463                                        int src_bit_size,
464                                        LLVMValueRef src, LLVMValueRef divisor)
465 {
466    LLVMBuilderRef builder = gallivm->builder;
467    /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
468       and divisor is -1. */
469    /* set mask if numerator == INT_MIN */
470    long long min_val;
471    switch (src_bit_size) {
472    case 8:
473       min_val = INT8_MIN;
474       break;
475    case 16:
476       min_val = INT16_MIN;
477       break;
478    default:
479    case 32:
480       min_val = INT_MIN;
481       break;
482    case 64:
483       min_val = INT64_MIN;
484       break;
485    }
486    LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
487                                          lp_build_const_int_vec(gallivm, int_bld->type, min_val));
488    /* set another mask if divisor is - 1 */
489    LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
490                                          lp_build_const_int_vec(gallivm, int_bld->type, -1));
491    div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
492 
493    divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
494    return divisor;
495 }
496 
497 static LLVMValueRef
do_int_divide(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)498 do_int_divide(struct lp_build_nir_context *bld_base,
499               bool is_unsigned, unsigned src_bit_size,
500               LLVMValueRef src, LLVMValueRef src2)
501 {
502    struct gallivm_state *gallivm = bld_base->base.gallivm;
503    LLVMBuilderRef builder = gallivm->builder;
504    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
505    struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
506 
507    /* avoid divide by 0. Converted divisor from 0 to -1 */
508    LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
509                                         mask_bld->zero);
510 
511    LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
512    if (!is_unsigned) {
513       divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
514                                    src_bit_size, src, divisor);
515    }
516    LLVMValueRef result = lp_build_div(int_bld, src, divisor);
517 
518    if (!is_unsigned) {
519       LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
520       return LLVMBuildAnd(builder, not_div_mask, result, "");
521    } else
522       /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
523        * may as well do same for idiv */
524       return LLVMBuildOr(builder, div_mask, result, "");
525 }
526 
527 static LLVMValueRef
do_int_mod(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)528 do_int_mod(struct lp_build_nir_context *bld_base,
529            bool is_unsigned, unsigned src_bit_size,
530            LLVMValueRef src, LLVMValueRef src2)
531 {
532    struct gallivm_state *gallivm = bld_base->base.gallivm;
533    LLVMBuilderRef builder = gallivm->builder;
534    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
535    struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
536    LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
537                                         mask_bld->zero);
538    LLVMValueRef divisor = LLVMBuildOr(builder,
539                                       div_mask,
540                                       src2, "");
541    if (!is_unsigned) {
542       divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
543                                    src_bit_size, src, divisor);
544    }
545    LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
546    return LLVMBuildOr(builder, div_mask, result, "");
547 }
548 
549 static LLVMValueRef
do_quantize_to_f16(struct lp_build_nir_context * bld_base,LLVMValueRef src)550 do_quantize_to_f16(struct lp_build_nir_context *bld_base,
551                    LLVMValueRef src)
552 {
553    struct gallivm_state *gallivm = bld_base->base.gallivm;
554    LLVMBuilderRef builder = gallivm->builder;
555    LLVMValueRef result, cond, cond2, temp;
556 
557    result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
558    result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
559 
560    temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
561    cond = LLVMBuildFCmp(builder, LLVMRealOGT,
562                         LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
563                         temp, "");
564    cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
565    cond = LLVMBuildAnd(builder, cond, cond2, "");
566    result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
567    return result;
568 }
569 
do_alu_action(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])570 static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
571                                   const nir_alu_instr *instr,
572                                   unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
573                                   LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
574 {
575    struct gallivm_state *gallivm = bld_base->base.gallivm;
576    LLVMBuilderRef builder = gallivm->builder;
577    LLVMValueRef result;
578 
579    switch (instr->op) {
580    case nir_op_b2f16:
581       result = emit_b2f(bld_base, src[0], 16);
582       break;
583    case nir_op_b2f32:
584       result = emit_b2f(bld_base, src[0], 32);
585       break;
586    case nir_op_b2f64:
587       result = emit_b2f(bld_base, src[0], 64);
588       break;
589    case nir_op_b2i8:
590       result = emit_b2i(bld_base, src[0], 8);
591       break;
592    case nir_op_b2i16:
593       result = emit_b2i(bld_base, src[0], 16);
594       break;
595    case nir_op_b2i32:
596       result = emit_b2i(bld_base, src[0], 32);
597       break;
598    case nir_op_b2i64:
599       result = emit_b2i(bld_base, src[0], 64);
600       break;
601    case nir_op_b32csel:
602       result = emit_b32csel(bld_base, src_bit_size, src);
603       break;
604    case nir_op_bit_count:
605       result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
606       if (src_bit_size[0] < 32)
607          result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
608       else if (src_bit_size[0] > 32)
609          result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
610       break;
611    case nir_op_bitfield_select:
612       result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
613       break;
614    case nir_op_bitfield_reverse:
615       result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
616       break;
617    case nir_op_f2b32:
618       result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
619       break;
620    case nir_op_f2f16:
621       if (src_bit_size[0] == 64)
622          src[0] = LLVMBuildFPTrunc(builder, src[0],
623                                    bld_base->base.vec_type, "");
624       result = LLVMBuildFPTrunc(builder, src[0],
625                                 bld_base->half_bld.vec_type, "");
626       break;
627    case nir_op_f2f32:
628       if (src_bit_size[0] < 32)
629          result = LLVMBuildFPExt(builder, src[0],
630                                  bld_base->base.vec_type, "");
631       else
632          result = LLVMBuildFPTrunc(builder, src[0],
633                                    bld_base->base.vec_type, "");
634       break;
635    case nir_op_f2f64:
636       result = LLVMBuildFPExt(builder, src[0],
637                               bld_base->dbl_bld.vec_type, "");
638       break;
639    case nir_op_f2i8:
640       result = LLVMBuildFPToSI(builder,
641                                src[0],
642                                bld_base->uint8_bld.vec_type, "");
643       break;
644    case nir_op_f2i16:
645       result = LLVMBuildFPToSI(builder,
646                                src[0],
647                                bld_base->uint16_bld.vec_type, "");
648       break;
649    case nir_op_f2i32:
650       result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
651       break;
652    case nir_op_f2u8:
653       result = LLVMBuildFPToUI(builder,
654                                src[0],
655                                bld_base->uint8_bld.vec_type, "");
656       break;
657    case nir_op_f2u16:
658       result = LLVMBuildFPToUI(builder,
659                                src[0],
660                                bld_base->uint16_bld.vec_type, "");
661       break;
662    case nir_op_f2u32:
663       result = LLVMBuildFPToUI(builder,
664                                src[0],
665                                bld_base->base.int_vec_type, "");
666       break;
667    case nir_op_f2i64:
668       result = LLVMBuildFPToSI(builder,
669                                src[0],
670                                bld_base->int64_bld.vec_type, "");
671       break;
672    case nir_op_f2u64:
673       result = LLVMBuildFPToUI(builder,
674                                src[0],
675                                bld_base->uint64_bld.vec_type, "");
676       break;
677    case nir_op_fabs:
678       result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
679       break;
680    case nir_op_fadd:
681       result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
682                             src[0], src[1]);
683       break;
684    case nir_op_fceil:
685       result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
686       break;
687    case nir_op_fcos:
688       result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
689       break;
690    case nir_op_fddx:
691    case nir_op_fddx_coarse:
692    case nir_op_fddx_fine:
693       result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
694       break;
695    case nir_op_fddy:
696    case nir_op_fddy_coarse:
697    case nir_op_fddy_fine:
698       result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
699       break;
700    case nir_op_fdiv:
701       result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
702                             src[0], src[1]);
703       break;
704    case nir_op_feq32:
705       result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
706       break;
707    case nir_op_fexp2:
708       result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
709       break;
710    case nir_op_ffloor:
711       result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
712       break;
713    case nir_op_ffma:
714       result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
715       break;
716    case nir_op_ffract: {
717       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
718       LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
719       result = lp_build_sub(flt_bld, src[0], tmp);
720       break;
721    }
722    case nir_op_fge32:
723       result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
724       break;
725    case nir_op_find_lsb: {
726       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
727       result = lp_build_cttz(int_bld, src[0]);
728       if (src_bit_size[0] < 32)
729          result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
730       else if (src_bit_size[0] > 32)
731          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
732       break;
733    }
734    case nir_op_fisfinite32:
735       result = lp_build_isfinite(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
736       break;
737    case nir_op_flog2:
738       result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
739       break;
740    case nir_op_flt:
741    case nir_op_flt32:
742       result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
743       break;
744    case nir_op_fmax:
745    case nir_op_fmin: {
746       enum gallivm_nan_behavior minmax_nan;
747       int first = 0;
748 
749       /* If one of the sources is known to be a number (i.e., not NaN), then
750        * better code can be generated by passing that information along.
751        */
752       if (is_a_number(bld_base->range_ht, instr, 1,
753                       0 /* unused num_components */,
754                       NULL /* unused swizzle */)) {
755          minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
756       } else if (is_a_number(bld_base->range_ht, instr, 0,
757                              0 /* unused num_components */,
758                              NULL /* unused swizzle */)) {
759          first = 1;
760          minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
761       } else {
762          minmax_nan = GALLIVM_NAN_RETURN_OTHER;
763       }
764 
765       if (instr->op == nir_op_fmin) {
766          result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
767                                    src[first], src[1 - first], minmax_nan);
768       } else {
769          result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
770                                    src[first], src[1 - first], minmax_nan);
771       }
772       break;
773    }
774    case nir_op_fmod: {
775       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
776       result = lp_build_div(flt_bld, src[0], src[1]);
777       result = lp_build_floor(flt_bld, result);
778       result = lp_build_mul(flt_bld, src[1], result);
779       result = lp_build_sub(flt_bld, src[0], result);
780       break;
781    }
782    case nir_op_fmul:
783       result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
784                             src[0], src[1]);
785       break;
786    case nir_op_fneu32:
787       result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
788       break;
789    case nir_op_fneg:
790       result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
791       break;
792    case nir_op_fpow:
793       result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
794       break;
795    case nir_op_fquantize2f16:
796       result = do_quantize_to_f16(bld_base, src[0]);
797       break;
798    case nir_op_frcp:
799       result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
800       break;
801    case nir_op_fround_even:
802       if (src_bit_size[0] == 16) {
803 	 struct lp_build_context *bld = get_flt_bld(bld_base, 16);
804 	 char intrinsic[64];
805 	 lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
806 	 result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
807       } else
808 	 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
809       break;
810    case nir_op_frsq:
811       result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
812       break;
813    case nir_op_fsat:
814       result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
815       break;
816    case nir_op_fsign:
817       result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
818       break;
819    case nir_op_fsin:
820       result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
821       break;
822    case nir_op_fsqrt:
823       result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
824       break;
825    case nir_op_ftrunc:
826       result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
827       break;
828    case nir_op_i2b32:
829       result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
830       break;
831    case nir_op_i2f16:
832       result = LLVMBuildSIToFP(builder, src[0],
833                                bld_base->half_bld.vec_type, "");
834       break;
835    case nir_op_i2f32:
836       result = lp_build_int_to_float(&bld_base->base, src[0]);
837       break;
838    case nir_op_i2f64:
839       result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
840       break;
841    case nir_op_i2i8:
842       result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
843       break;
844    case nir_op_i2i16:
845       if (src_bit_size[0] < 16)
846          result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
847       else
848          result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
849       break;
850    case nir_op_i2i32:
851       if (src_bit_size[0] < 32)
852          result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
853       else
854          result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
855       break;
856    case nir_op_i2i64:
857       result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
858       break;
859    case nir_op_iabs:
860       result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
861       break;
862    case nir_op_iadd:
863       result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
864                             src[0], src[1]);
865       break;
866    case nir_op_iand:
867       result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
868                             src[0], src[1]);
869       break;
870    case nir_op_idiv:
871       result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
872       break;
873    case nir_op_ieq32:
874       result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
875       break;
876    case nir_op_ige32:
877       result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
878       break;
879    case nir_op_ilt32:
880       result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
881       break;
882    case nir_op_imax:
883       result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
884       break;
885    case nir_op_imin:
886       result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
887       break;
888    case nir_op_imul:
889    case nir_op_imul24:
890       result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
891                             src[0], src[1]);
892       break;
893    case nir_op_imul_high: {
894       LLVMValueRef hi_bits;
895       lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
896       result = hi_bits;
897       break;
898    }
899    case nir_op_ine32:
900       result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
901       break;
902    case nir_op_ineg:
903       result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
904       break;
905    case nir_op_inot:
906       result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
907       break;
908    case nir_op_ior:
909       result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
910                            src[0], src[1]);
911       break;
912    case nir_op_imod:
913    case nir_op_irem:
914       result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
915       break;
916    case nir_op_ishl: {
917       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
918       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
919       if (src_bit_size[0] == 64)
920          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
921       if (src_bit_size[0] < 32)
922          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
923       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
924       result = lp_build_shl(int_bld, src[0], src[1]);
925       break;
926    }
927    case nir_op_ishr: {
928       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
929       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
930       if (src_bit_size[0] == 64)
931          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
932       if (src_bit_size[0] < 32)
933          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
934       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
935       result = lp_build_shr(int_bld, src[0], src[1]);
936       break;
937    }
938    case nir_op_isign:
939       result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
940       break;
941    case nir_op_isub:
942       result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
943                             src[0], src[1]);
944       break;
945    case nir_op_ixor:
946       result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
947                             src[0], src[1]);
948       break;
949    case nir_op_mov:
950       result = src[0];
951       break;
952    case nir_op_unpack_64_2x32_split_x:
953       result = split_64bit(bld_base, src[0], false);
954       break;
955    case nir_op_unpack_64_2x32_split_y:
956       result = split_64bit(bld_base, src[0], true);
957       break;
958 
959    case nir_op_pack_32_2x16_split: {
960       LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
961       result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
962       break;
963    }
964    case nir_op_unpack_32_2x16_split_x:
965       result = split_16bit(bld_base, src[0], false);
966       break;
967    case nir_op_unpack_32_2x16_split_y:
968       result = split_16bit(bld_base, src[0], true);
969       break;
970    case nir_op_pack_64_2x32_split: {
971       LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
972       result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
973       break;
974    }
975    case nir_op_u2f16:
976       result = LLVMBuildUIToFP(builder, src[0],
977                                bld_base->half_bld.vec_type, "");
978       break;
979    case nir_op_u2f32:
980       result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
981       break;
982    case nir_op_u2f64:
983       result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
984       break;
985    case nir_op_u2u8:
986       result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
987       break;
988    case nir_op_u2u16:
989       if (src_bit_size[0] < 16)
990          result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
991       else
992          result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
993       break;
994    case nir_op_u2u32:
995       if (src_bit_size[0] < 32)
996          result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
997       else
998          result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
999       break;
1000    case nir_op_u2u64:
1001       result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
1002       break;
1003    case nir_op_udiv:
1004       result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
1005       break;
1006    case nir_op_ufind_msb: {
1007       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1008       result = lp_build_ctlz(uint_bld, src[0]);
1009       result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
1010       if (src_bit_size[0] < 32)
1011          result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
1012       else
1013          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
1014       break;
1015    }
1016    case nir_op_uge32:
1017       result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
1018       break;
1019    case nir_op_ult32:
1020       result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
1021       break;
1022    case nir_op_umax:
1023       result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1024       break;
1025    case nir_op_umin:
1026       result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1027       break;
1028    case nir_op_umod:
1029       result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
1030       break;
1031    case nir_op_umul_high: {
1032       LLVMValueRef hi_bits;
1033       lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
1034       result = hi_bits;
1035       break;
1036    }
1037    case nir_op_ushr: {
1038       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1039       if (src_bit_size[0] == 64)
1040          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1041       if (src_bit_size[0] < 32)
1042          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1043       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1044       result = lp_build_shr(uint_bld, src[0], src[1]);
1045       break;
1046    }
1047    default:
1048       assert(0);
1049       break;
1050    }
1051    return result;
1052 }
1053 
visit_alu(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr)1054 static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr)
1055 {
1056    struct gallivm_state *gallivm = bld_base->base.gallivm;
1057    LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
1058    unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
1059    unsigned num_components = nir_dest_num_components(instr->dest.dest);
1060    unsigned src_components;
1061    switch (instr->op) {
1062    case nir_op_vec2:
1063    case nir_op_vec3:
1064    case nir_op_vec4:
1065    case nir_op_vec8:
1066    case nir_op_vec16:
1067       src_components = 1;
1068       break;
1069    case nir_op_pack_half_2x16:
1070       src_components = 2;
1071       break;
1072    case nir_op_unpack_half_2x16:
1073       src_components = 1;
1074       break;
1075    case nir_op_cube_face_coord_amd:
1076    case nir_op_cube_face_index_amd:
1077       src_components = 3;
1078       break;
1079    case nir_op_fsum2:
1080    case nir_op_fsum3:
1081    case nir_op_fsum4:
1082       src_components = nir_op_infos[instr->op].input_sizes[0];
1083       break;
1084    default:
1085       src_components = num_components;
1086       break;
1087    }
1088    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1089       src[i] = get_alu_src(bld_base, instr->src[i], src_components);
1090       src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
1091    }
1092 
1093    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1094    if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
1095       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1096          result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1097       }
1098    } else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) {
1099       for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
1100          LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
1101                                                           src[0], c, "");
1102          temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
1103          result[0] = (c == 0) ? temp_chan : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), result[0], temp_chan);
1104       }
1105     } else {
1106       for (unsigned c = 0; c < num_components; c++) {
1107          LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
1108 
1109          for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1110             if (num_components > 1) {
1111                src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
1112                                                      src[i], c, "");
1113             } else
1114                src_chan[i] = src[i];
1115             src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1116          }
1117          result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
1118          result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
1119       }
1120    }
1121    assign_alu_dest(bld_base, &instr->dest, result);
1122  }
1123 
visit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr)1124 static void visit_load_const(struct lp_build_nir_context *bld_base,
1125                              const nir_load_const_instr *instr)
1126 {
1127    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1128    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1129    for (unsigned i = 0; i < instr->def.num_components; i++)
1130       result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
1131    memset(&result[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components);
1132    assign_ssa_dest(bld_base, &instr->def, result);
1133 }
1134 
1135 static void
get_deref_offset(struct lp_build_nir_context * bld_base,nir_deref_instr * instr,bool vs_in,unsigned * vertex_index_out,LLVMValueRef * vertex_index_ref,unsigned * const_out,LLVMValueRef * indir_out)1136 get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1137                  bool vs_in, unsigned *vertex_index_out,
1138                  LLVMValueRef *vertex_index_ref,
1139                  unsigned *const_out, LLVMValueRef *indir_out)
1140 {
1141    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1142    nir_variable *var = nir_deref_instr_get_variable(instr);
1143    nir_deref_path path;
1144    unsigned idx_lvl = 1;
1145 
1146    nir_deref_path_init(&path, instr, NULL);
1147 
1148    if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1149       if (vertex_index_ref) {
1150          *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1151          if (vertex_index_out)
1152             *vertex_index_out = 0;
1153       } else {
1154          *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1155       }
1156       ++idx_lvl;
1157    }
1158 
1159    uint32_t const_offset = 0;
1160    LLVMValueRef offset = NULL;
1161 
1162    if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1163       assert(instr->deref_type == nir_deref_type_array);
1164       const_offset = nir_src_as_uint(instr->arr.index);
1165       goto out;
1166    }
1167 
1168    for (; path.path[idx_lvl]; ++idx_lvl) {
1169       const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1170       if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1171          unsigned index = path.path[idx_lvl]->strct.index;
1172 
1173          for (unsigned i = 0; i < index; i++) {
1174             const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1175             const_offset += glsl_count_attribute_slots(ft, vs_in);
1176          }
1177       } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1178          unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1179          if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1180            const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1181          } else {
1182            LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1183            idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1184            LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1185                                                idx_src);
1186            if (offset)
1187              offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1188            else
1189              offset = array_off;
1190          }
1191       } else
1192          unreachable("Uhandled deref type in get_deref_instr_offset");
1193    }
1194 
1195 out:
1196    nir_deref_path_finish(&path);
1197 
1198    if (const_offset && offset)
1199       offset = LLVMBuildAdd(builder, offset,
1200                             lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1201                             "");
1202    *const_out = const_offset;
1203    *indir_out = offset;
1204 }
1205 
1206 static void
visit_load_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1207 visit_load_input(struct lp_build_nir_context *bld_base,
1208                  nir_intrinsic_instr *instr,
1209                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1210 {
1211    nir_variable var = {0};
1212    var.data.location = nir_intrinsic_io_semantics(instr).location;
1213    var.data.driver_location = nir_intrinsic_base(instr);
1214    var.data.location_frac = nir_intrinsic_component(instr);
1215 
1216    unsigned nc = nir_dest_num_components(instr->dest);
1217    unsigned bit_size = nir_dest_bit_size(instr->dest);
1218 
1219    nir_src offset = *nir_get_io_offset_src(instr);
1220    bool indirect = !nir_src_is_const(offset);
1221    if (!indirect)
1222       assert(nir_src_as_uint(offset) == 0);
1223    LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1224 
1225    bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1226 }
1227 
1228 static void
visit_store_output(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1229 visit_store_output(struct lp_build_nir_context *bld_base,
1230                    nir_intrinsic_instr *instr)
1231 {
1232    nir_variable var = {0};
1233    var.data.location = nir_intrinsic_io_semantics(instr).location;
1234    var.data.driver_location = nir_intrinsic_base(instr);
1235    var.data.location_frac = nir_intrinsic_component(instr);
1236 
1237    unsigned mask = nir_intrinsic_write_mask(instr);
1238 
1239    unsigned bit_size = nir_src_bit_size(instr->src[0]);
1240    LLVMValueRef src = get_src(bld_base, instr->src[0]);
1241 
1242    nir_src offset = *nir_get_io_offset_src(instr);
1243    bool indirect = !nir_src_is_const(offset);
1244    if (!indirect)
1245       assert(nir_src_as_uint(offset) == 0);
1246    LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1247 
1248    if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1249       src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1250                                   src, 0, "");
1251    }
1252 
1253    bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1254                        bit_size, &var, mask, NULL, 0, indir_index, src);
1255 }
1256 
visit_load_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1257 static void visit_load_var(struct lp_build_nir_context *bld_base,
1258                            nir_intrinsic_instr *instr,
1259                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1260 {
1261    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1262    nir_variable *var = nir_deref_instr_get_variable(deref);
1263    assert(util_bitcount(deref->modes) == 1);
1264    nir_variable_mode mode = deref->modes;
1265    unsigned const_index;
1266    LLVMValueRef indir_index;
1267    LLVMValueRef indir_vertex_index = NULL;
1268    unsigned vertex_index = 0;
1269    unsigned nc = nir_dest_num_components(instr->dest);
1270    unsigned bit_size = nir_dest_bit_size(instr->dest);
1271    if (var) {
1272       bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1273          var->data.mode == nir_var_shader_in;
1274       bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1275          var->data.mode == nir_var_shader_in;
1276       bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1277          var->data.mode == nir_var_shader_in;
1278       bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1279          var->data.mode == nir_var_shader_out && !var->data.patch;
1280       bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1281          var->data.mode == nir_var_shader_in && !var->data.patch;
1282 
1283       mode = var->data.mode;
1284 
1285       get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1286                        &const_index, &indir_index);
1287    }
1288    bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, indir_vertex_index, const_index, indir_index, result);
1289 }
1290 
1291 static void
visit_store_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1292 visit_store_var(struct lp_build_nir_context *bld_base,
1293                 nir_intrinsic_instr *instr)
1294 {
1295    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1296    nir_variable *var = nir_deref_instr_get_variable(deref);
1297    assert(util_bitcount(deref->modes) == 1);
1298    nir_variable_mode mode = deref->modes;
1299    int writemask = instr->const_index[0];
1300    unsigned bit_size = nir_src_bit_size(instr->src[1]);
1301    LLVMValueRef src = get_src(bld_base, instr->src[1]);
1302    unsigned const_index = 0;
1303    LLVMValueRef indir_index, indir_vertex_index = NULL;
1304    if (var) {
1305       bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1306          var->data.mode == nir_var_shader_out && !var->data.patch;
1307       get_deref_offset(bld_base, deref, false, NULL, tcs_out ? &indir_vertex_index : NULL,
1308                        &const_index, &indir_index);
1309    }
1310    bld_base->store_var(bld_base, mode, instr->num_components, bit_size, var, writemask, indir_vertex_index, const_index, indir_index, src);
1311 }
1312 
visit_load_ubo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1313 static void visit_load_ubo(struct lp_build_nir_context *bld_base,
1314                            nir_intrinsic_instr *instr,
1315                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1316 {
1317    struct gallivm_state *gallivm = bld_base->base.gallivm;
1318    LLVMBuilderRef builder = gallivm->builder;
1319    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1320    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1321 
1322    bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[1]);
1323    idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1324    bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1325                       offset_is_uniform, idx, offset, result);
1326 }
1327 
visit_load_push_constant(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[4])1328 static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
1329                                      nir_intrinsic_instr *instr,
1330                                      LLVMValueRef result[4])
1331 {
1332    struct gallivm_state *gallivm = bld_base->base.gallivm;
1333    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1334    LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1335    bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1336 
1337    bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1338                       offset_is_uniform, idx, offset, result);
1339 }
1340 
1341 
visit_load_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1342 static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
1343                            nir_intrinsic_instr *instr,
1344                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1345 {
1346    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1347    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1348    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1349                        idx, offset, result);
1350 }
1351 
visit_store_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1352 static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
1353                              nir_intrinsic_instr *instr)
1354 {
1355    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1356    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
1357    LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1358    int writemask = instr->const_index[0];
1359    int nc = nir_src_num_components(instr->src[0]);
1360    int bitsize = nir_src_bit_size(instr->src[0]);
1361    bld_base->store_mem(bld_base, writemask, nc, bitsize, idx, offset, val);
1362 }
1363 
visit_get_ssbo_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1364 static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1365                                 nir_intrinsic_instr *instr,
1366                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1367 {
1368    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1369    result[0] = bld_base->get_ssbo_size(bld_base, idx);
1370 }
1371 
visit_ssbo_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1372 static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1373                               nir_intrinsic_instr *instr,
1374                               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1375 {
1376    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1377    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1378    LLVMValueRef val = get_src(bld_base, instr->src[2]);
1379    LLVMValueRef val2 = NULL;
1380    int bitsize = nir_src_bit_size(instr->src[2]);
1381    if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1382       val2 = get_src(bld_base, instr->src[3]);
1383 
1384    bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx, offset, val, val2, &result[0]);
1385 
1386 }
1387 
visit_load_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1388 static void visit_load_image(struct lp_build_nir_context *bld_base,
1389                              nir_intrinsic_instr *instr,
1390                              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1391 {
1392    struct gallivm_state *gallivm = bld_base->base.gallivm;
1393    LLVMBuilderRef builder = gallivm->builder;
1394    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1395    nir_variable *var = nir_deref_instr_get_variable(deref);
1396    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1397    LLVMValueRef coords[5];
1398    struct lp_img_params params;
1399    const struct glsl_type *type = glsl_without_array(var->type);
1400    unsigned const_index;
1401    LLVMValueRef indir_index;
1402    get_deref_offset(bld_base, deref, false, NULL, NULL,
1403                     &const_index, &indir_index);
1404 
1405    memset(&params, 0, sizeof(params));
1406    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1407    for (unsigned i = 0; i < 4; i++)
1408       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1409    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1410       coords[2] = coords[1];
1411 
1412    params.coords = coords;
1413    params.outdata = result;
1414    params.img_op = LP_IMG_LOAD;
1415    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS)
1416       params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]), nir_type_uint, 32);
1417    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1418    params.image_index_offset = indir_index;
1419    bld_base->image_op(bld_base, &params);
1420 }
1421 
visit_store_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1422 static void visit_store_image(struct lp_build_nir_context *bld_base,
1423                               nir_intrinsic_instr *instr)
1424 {
1425    struct gallivm_state *gallivm = bld_base->base.gallivm;
1426    LLVMBuilderRef builder = gallivm->builder;
1427    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1428    nir_variable *var = nir_deref_instr_get_variable(deref);
1429    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1430    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1431    LLVMValueRef coords[5];
1432    struct lp_img_params params;
1433    const struct glsl_type *type = glsl_without_array(var->type);
1434    unsigned const_index;
1435    LLVMValueRef indir_index;
1436    get_deref_offset(bld_base, deref, false, NULL, NULL,
1437                     &const_index, &indir_index);
1438 
1439    memset(&params, 0, sizeof(params));
1440    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1441    for (unsigned i = 0; i < 4; i++)
1442       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1443    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1444       coords[2] = coords[1];
1445    params.coords = coords;
1446 
1447    for (unsigned i = 0; i < 4; i++) {
1448       params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1449       params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1450    }
1451    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1452       params.ms_index = get_src(bld_base, instr->src[2]);
1453    params.img_op = LP_IMG_STORE;
1454    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1455    params.image_index_offset = indir_index;
1456 
1457    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1458       coords[2] = coords[1];
1459    bld_base->image_op(bld_base, &params);
1460 }
1461 
visit_atomic_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1462 static void visit_atomic_image(struct lp_build_nir_context *bld_base,
1463                                nir_intrinsic_instr *instr,
1464                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1465 {
1466    struct gallivm_state *gallivm = bld_base->base.gallivm;
1467    LLVMBuilderRef builder = gallivm->builder;
1468    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1469    nir_variable *var = nir_deref_instr_get_variable(deref);
1470    struct lp_img_params params;
1471    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1472    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1473    LLVMValueRef coords[5];
1474    const struct glsl_type *type = glsl_without_array(var->type);
1475    unsigned const_index;
1476    LLVMValueRef indir_index;
1477    get_deref_offset(bld_base, deref, false, NULL, NULL,
1478                     &const_index, &indir_index);
1479 
1480    memset(&params, 0, sizeof(params));
1481 
1482    switch (instr->intrinsic) {
1483    case nir_intrinsic_image_deref_atomic_add:
1484       params.op = LLVMAtomicRMWBinOpAdd;
1485       break;
1486    case nir_intrinsic_image_deref_atomic_exchange:
1487       params.op = LLVMAtomicRMWBinOpXchg;
1488       break;
1489    case nir_intrinsic_image_deref_atomic_and:
1490       params.op = LLVMAtomicRMWBinOpAnd;
1491       break;
1492    case nir_intrinsic_image_deref_atomic_or:
1493       params.op = LLVMAtomicRMWBinOpOr;
1494       break;
1495    case nir_intrinsic_image_deref_atomic_xor:
1496       params.op = LLVMAtomicRMWBinOpXor;
1497       break;
1498    case nir_intrinsic_image_deref_atomic_umin:
1499       params.op = LLVMAtomicRMWBinOpUMin;
1500       break;
1501    case nir_intrinsic_image_deref_atomic_umax:
1502       params.op = LLVMAtomicRMWBinOpUMax;
1503       break;
1504    case nir_intrinsic_image_deref_atomic_imin:
1505       params.op = LLVMAtomicRMWBinOpMin;
1506       break;
1507    case nir_intrinsic_image_deref_atomic_imax:
1508       params.op = LLVMAtomicRMWBinOpMax;
1509       break;
1510    default:
1511       break;
1512    }
1513 
1514    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1515    for (unsigned i = 0; i < 4; i++)
1516       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1517    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1518       coords[2] = coords[1];
1519    params.coords = coords;
1520    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1521       params.ms_index = get_src(bld_base, instr->src[2]);
1522    if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1523       LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1524       params.indata[0] = in_val;
1525       params.indata2[0] = cas_val;
1526    } else
1527       params.indata[0] = in_val;
1528 
1529    params.outdata = result;
1530    params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1531    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1532    params.image_index_offset = indir_index;
1533 
1534    bld_base->image_op(bld_base, &params);
1535 }
1536 
1537 
visit_image_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1538 static void visit_image_size(struct lp_build_nir_context *bld_base,
1539                              nir_intrinsic_instr *instr,
1540                              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1541 {
1542    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1543    nir_variable *var = nir_deref_instr_get_variable(deref);
1544    struct lp_sampler_size_query_params params = { 0 };
1545    unsigned const_index;
1546    LLVMValueRef indir_index;
1547    const struct glsl_type *type = glsl_without_array(var->type);
1548    get_deref_offset(bld_base, deref, false, NULL, NULL,
1549                     &const_index, &indir_index);
1550    params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1551    params.texture_unit_offset = indir_index;
1552    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1553    params.sizes_out = result;
1554 
1555    bld_base->image_size(bld_base, &params);
1556 }
1557 
visit_image_samples(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1558 static void visit_image_samples(struct lp_build_nir_context *bld_base,
1559                                 nir_intrinsic_instr *instr,
1560                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1561 {
1562    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1563    nir_variable *var = nir_deref_instr_get_variable(deref);
1564    struct lp_sampler_size_query_params params = { 0 };
1565    unsigned const_index;
1566    LLVMValueRef indir_index;
1567    const struct glsl_type *type = glsl_without_array(var->type);
1568    get_deref_offset(bld_base, deref, false, NULL, NULL,
1569                     &const_index, &indir_index);
1570 
1571    params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1572    params.texture_unit_offset = indir_index;
1573    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1574    params.sizes_out = result;
1575    params.samples_only = true;
1576 
1577    bld_base->image_size(bld_base, &params);
1578 }
1579 
visit_shared_load(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1580 static void visit_shared_load(struct lp_build_nir_context *bld_base,
1581                                 nir_intrinsic_instr *instr,
1582                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1583 {
1584    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1585    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1586                       NULL, offset, result);
1587 }
1588 
visit_shared_store(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1589 static void visit_shared_store(struct lp_build_nir_context *bld_base,
1590                                nir_intrinsic_instr *instr)
1591 {
1592    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1593    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1594    int writemask = instr->const_index[1];
1595    int nc = nir_src_num_components(instr->src[0]);
1596    int bitsize = nir_src_bit_size(instr->src[0]);
1597    bld_base->store_mem(bld_base, writemask, nc, bitsize, NULL, offset, val);
1598 }
1599 
visit_shared_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1600 static void visit_shared_atomic(struct lp_build_nir_context *bld_base,
1601                                 nir_intrinsic_instr *instr,
1602                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1603 {
1604    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1605    LLVMValueRef val = get_src(bld_base, instr->src[1]);
1606    LLVMValueRef val2 = NULL;
1607    int bitsize = nir_src_bit_size(instr->src[1]);
1608    if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1609       val2 = get_src(bld_base, instr->src[2]);
1610 
1611    bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]);
1612 
1613 }
1614 
visit_barrier(struct lp_build_nir_context * bld_base)1615 static void visit_barrier(struct lp_build_nir_context *bld_base)
1616 {
1617    bld_base->barrier(bld_base);
1618 }
1619 
visit_discard(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1620 static void visit_discard(struct lp_build_nir_context *bld_base,
1621                           nir_intrinsic_instr *instr)
1622 {
1623    LLVMValueRef cond = NULL;
1624    if (instr->intrinsic == nir_intrinsic_discard_if) {
1625       cond = get_src(bld_base, instr->src[0]);
1626       cond = cast_type(bld_base, cond, nir_type_int, 32);
1627    }
1628    bld_base->discard(bld_base, cond);
1629 }
1630 
visit_load_kernel_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1631 static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1632                                     nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1633 {
1634    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1635 
1636    bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1637    bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1638                              nir_src_bit_size(instr->src[0]),
1639                              offset_is_uniform, offset, result);
1640 }
1641 
visit_load_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1642 static void visit_load_global(struct lp_build_nir_context *bld_base,
1643                               nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1644 {
1645    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1646    bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1647                          nir_src_bit_size(instr->src[0]),
1648                          addr, result);
1649 }
1650 
visit_store_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1651 static void visit_store_global(struct lp_build_nir_context *bld_base,
1652                                nir_intrinsic_instr *instr)
1653 {
1654    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1655    int nc = nir_src_num_components(instr->src[0]);
1656    int bitsize = nir_src_bit_size(instr->src[0]);
1657    LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1658    int addr_bitsize = nir_src_bit_size(instr->src[1]);
1659    int writemask = instr->const_index[0];
1660    bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
1661 }
1662 
visit_global_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1663 static void visit_global_atomic(struct lp_build_nir_context *bld_base,
1664                                 nir_intrinsic_instr *instr,
1665                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1666 {
1667    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1668    LLVMValueRef val = get_src(bld_base, instr->src[1]);
1669    LLVMValueRef val2 = NULL;
1670    int addr_bitsize = nir_src_bit_size(instr->src[0]);
1671    int val_bitsize = nir_src_bit_size(instr->src[1]);
1672    if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1673       val2 = get_src(bld_base, instr->src[2]);
1674 
1675    bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
1676                            val_bitsize, addr, val, val2, &result[0]);
1677 }
1678 
visit_interp(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1679 static void visit_interp(struct lp_build_nir_context *bld_base,
1680                          nir_intrinsic_instr *instr,
1681                          LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1682 {
1683    struct gallivm_state *gallivm = bld_base->base.gallivm;
1684    LLVMBuilderRef builder = gallivm->builder;
1685    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1686    unsigned num_components = nir_dest_num_components(instr->dest);
1687    nir_variable *var = nir_deref_instr_get_variable(deref);
1688    unsigned const_index;
1689    LLVMValueRef indir_index;
1690    LLVMValueRef offsets[2] = { NULL, NULL };
1691    get_deref_offset(bld_base, deref, false, NULL, NULL,
1692                     &const_index, &indir_index);
1693    bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1694    bool sample = false;
1695    if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1696       for (unsigned i = 0; i < 2; i++) {
1697          offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1698          offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1699       }
1700    } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1701       offsets[0] = get_src(bld_base, instr->src[1]);
1702       offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1703       sample = true;
1704    }
1705    bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
1706 }
1707 
visit_load_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1708 static void visit_load_scratch(struct lp_build_nir_context *bld_base,
1709                                nir_intrinsic_instr *instr,
1710                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1711 {
1712    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1713 
1714    bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1715                           nir_dest_bit_size(instr->dest), offset, result);
1716 }
1717 
visit_store_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1718 static void visit_store_scratch(struct lp_build_nir_context *bld_base,
1719                                 nir_intrinsic_instr *instr)
1720 {
1721    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1722    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1723    int writemask = instr->const_index[2];
1724    int nc = nir_src_num_components(instr->src[0]);
1725    int bitsize = nir_src_bit_size(instr->src[0]);
1726    bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1727 }
1728 
1729 
visit_intrinsic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1730 static void visit_intrinsic(struct lp_build_nir_context *bld_base,
1731                             nir_intrinsic_instr *instr)
1732 {
1733    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
1734    switch (instr->intrinsic) {
1735    case nir_intrinsic_load_input:
1736       visit_load_input(bld_base, instr, result);
1737       break;
1738    case nir_intrinsic_store_output:
1739       visit_store_output(bld_base, instr);
1740       break;
1741    case nir_intrinsic_load_deref:
1742       visit_load_var(bld_base, instr, result);
1743       break;
1744    case nir_intrinsic_store_deref:
1745       visit_store_var(bld_base, instr);
1746       break;
1747    case nir_intrinsic_load_ubo:
1748       visit_load_ubo(bld_base, instr, result);
1749       break;
1750    case nir_intrinsic_load_push_constant:
1751       visit_load_push_constant(bld_base, instr, result);
1752       break;
1753    case nir_intrinsic_load_ssbo:
1754       visit_load_ssbo(bld_base, instr, result);
1755       break;
1756    case nir_intrinsic_store_ssbo:
1757       visit_store_ssbo(bld_base, instr);
1758       break;
1759    case nir_intrinsic_get_ssbo_size:
1760       visit_get_ssbo_size(bld_base, instr, result);
1761       break;
1762    case nir_intrinsic_load_vertex_id:
1763    case nir_intrinsic_load_primitive_id:
1764    case nir_intrinsic_load_instance_id:
1765    case nir_intrinsic_load_base_instance:
1766    case nir_intrinsic_load_base_vertex:
1767    case nir_intrinsic_load_first_vertex:
1768    case nir_intrinsic_load_workgroup_id:
1769    case nir_intrinsic_load_local_invocation_id:
1770    case nir_intrinsic_load_local_invocation_index:
1771    case nir_intrinsic_load_num_workgroups:
1772    case nir_intrinsic_load_invocation_id:
1773    case nir_intrinsic_load_front_face:
1774    case nir_intrinsic_load_draw_id:
1775    case nir_intrinsic_load_workgroup_size:
1776    case nir_intrinsic_load_work_dim:
1777    case nir_intrinsic_load_tess_coord:
1778    case nir_intrinsic_load_tess_level_outer:
1779    case nir_intrinsic_load_tess_level_inner:
1780    case nir_intrinsic_load_patch_vertices_in:
1781    case nir_intrinsic_load_sample_id:
1782    case nir_intrinsic_load_sample_pos:
1783    case nir_intrinsic_load_sample_mask_in:
1784    case nir_intrinsic_load_view_index:
1785    case nir_intrinsic_load_subgroup_invocation:
1786    case nir_intrinsic_load_subgroup_id:
1787    case nir_intrinsic_load_num_subgroups:
1788       bld_base->sysval_intrin(bld_base, instr, result);
1789       break;
1790    case nir_intrinsic_load_helper_invocation:
1791       bld_base->helper_invocation(bld_base, &result[0]);
1792       break;
1793    case nir_intrinsic_discard_if:
1794    case nir_intrinsic_discard:
1795       visit_discard(bld_base, instr);
1796       break;
1797    case nir_intrinsic_emit_vertex:
1798       bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
1799       break;
1800    case nir_intrinsic_end_primitive:
1801       bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
1802       break;
1803    case nir_intrinsic_ssbo_atomic_add:
1804    case nir_intrinsic_ssbo_atomic_imin:
1805    case nir_intrinsic_ssbo_atomic_imax:
1806    case nir_intrinsic_ssbo_atomic_umin:
1807    case nir_intrinsic_ssbo_atomic_umax:
1808    case nir_intrinsic_ssbo_atomic_and:
1809    case nir_intrinsic_ssbo_atomic_or:
1810    case nir_intrinsic_ssbo_atomic_xor:
1811    case nir_intrinsic_ssbo_atomic_exchange:
1812    case nir_intrinsic_ssbo_atomic_comp_swap:
1813       visit_ssbo_atomic(bld_base, instr, result);
1814       break;
1815    case nir_intrinsic_image_deref_load:
1816       visit_load_image(bld_base, instr, result);
1817       break;
1818    case nir_intrinsic_image_deref_store:
1819       visit_store_image(bld_base, instr);
1820       break;
1821    case nir_intrinsic_image_deref_atomic_add:
1822    case nir_intrinsic_image_deref_atomic_imin:
1823    case nir_intrinsic_image_deref_atomic_imax:
1824    case nir_intrinsic_image_deref_atomic_umin:
1825    case nir_intrinsic_image_deref_atomic_umax:
1826    case nir_intrinsic_image_deref_atomic_and:
1827    case nir_intrinsic_image_deref_atomic_or:
1828    case nir_intrinsic_image_deref_atomic_xor:
1829    case nir_intrinsic_image_deref_atomic_exchange:
1830    case nir_intrinsic_image_deref_atomic_comp_swap:
1831       visit_atomic_image(bld_base, instr, result);
1832       break;
1833    case nir_intrinsic_image_deref_size:
1834       visit_image_size(bld_base, instr, result);
1835       break;
1836    case nir_intrinsic_image_deref_samples:
1837       visit_image_samples(bld_base, instr, result);
1838       break;
1839    case nir_intrinsic_load_shared:
1840       visit_shared_load(bld_base, instr, result);
1841       break;
1842    case nir_intrinsic_store_shared:
1843       visit_shared_store(bld_base, instr);
1844       break;
1845    case nir_intrinsic_shared_atomic_add:
1846    case nir_intrinsic_shared_atomic_imin:
1847    case nir_intrinsic_shared_atomic_umin:
1848    case nir_intrinsic_shared_atomic_imax:
1849    case nir_intrinsic_shared_atomic_umax:
1850    case nir_intrinsic_shared_atomic_and:
1851    case nir_intrinsic_shared_atomic_or:
1852    case nir_intrinsic_shared_atomic_xor:
1853    case nir_intrinsic_shared_atomic_exchange:
1854    case nir_intrinsic_shared_atomic_comp_swap:
1855       visit_shared_atomic(bld_base, instr, result);
1856       break;
1857    case nir_intrinsic_control_barrier:
1858       visit_barrier(bld_base);
1859       break;
1860    case nir_intrinsic_group_memory_barrier:
1861    case nir_intrinsic_memory_barrier:
1862    case nir_intrinsic_memory_barrier_shared:
1863    case nir_intrinsic_memory_barrier_buffer:
1864    case nir_intrinsic_memory_barrier_image:
1865    case nir_intrinsic_memory_barrier_tcs_patch:
1866       break;
1867    case nir_intrinsic_load_kernel_input:
1868       visit_load_kernel_input(bld_base, instr, result);
1869      break;
1870    case nir_intrinsic_load_global:
1871    case nir_intrinsic_load_global_constant:
1872       visit_load_global(bld_base, instr, result);
1873       break;
1874    case nir_intrinsic_store_global:
1875       visit_store_global(bld_base, instr);
1876       break;
1877    case nir_intrinsic_global_atomic_add:
1878    case nir_intrinsic_global_atomic_imin:
1879    case nir_intrinsic_global_atomic_umin:
1880    case nir_intrinsic_global_atomic_imax:
1881    case nir_intrinsic_global_atomic_umax:
1882    case nir_intrinsic_global_atomic_and:
1883    case nir_intrinsic_global_atomic_or:
1884    case nir_intrinsic_global_atomic_xor:
1885    case nir_intrinsic_global_atomic_exchange:
1886    case nir_intrinsic_global_atomic_comp_swap:
1887       visit_global_atomic(bld_base, instr, result);
1888       break;
1889    case nir_intrinsic_vote_all:
1890    case nir_intrinsic_vote_any:
1891    case nir_intrinsic_vote_ieq:
1892    case nir_intrinsic_vote_feq:
1893       bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1894       break;
1895    case nir_intrinsic_elect:
1896       bld_base->elect(bld_base, result);
1897       break;
1898    case nir_intrinsic_reduce:
1899    case nir_intrinsic_inclusive_scan:
1900    case nir_intrinsic_exclusive_scan:
1901       bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1902       break;
1903    case nir_intrinsic_ballot:
1904       bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
1905       break;
1906    case nir_intrinsic_read_invocation:
1907    case nir_intrinsic_read_first_invocation: {
1908       LLVMValueRef src1 = NULL;
1909 
1910       if (instr->intrinsic == nir_intrinsic_read_invocation)
1911          src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32);
1912       bld_base->read_invocation(bld_base, get_src(bld_base, instr->src[0]), nir_src_bit_size(instr->src[0]), src1, result);
1913       break;
1914    }
1915    case nir_intrinsic_interp_deref_at_offset:
1916    case nir_intrinsic_interp_deref_at_centroid:
1917    case nir_intrinsic_interp_deref_at_sample:
1918       visit_interp(bld_base, instr, result);
1919       break;
1920    case nir_intrinsic_load_scratch:
1921       visit_load_scratch(bld_base, instr, result);
1922       break;
1923    case nir_intrinsic_store_scratch:
1924       visit_store_scratch(bld_base, instr);
1925       break;
1926    default:
1927       fprintf(stderr, "Unsupported intrinsic: ");
1928       nir_print_instr(&instr->instr, stderr);
1929       fprintf(stderr, "\n");
1930       assert(0);
1931       break;
1932    }
1933    if (result[0]) {
1934       assign_dest(bld_base, &instr->dest, result);
1935    }
1936 }
1937 
visit_txs(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)1938 static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1939 {
1940    struct lp_sampler_size_query_params params = { 0 };
1941    LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
1942    LLVMValueRef explicit_lod = NULL;
1943    LLVMValueRef texture_unit_offset = NULL;
1944    for (unsigned i = 0; i < instr->num_srcs; i++) {
1945       switch (instr->src[i].src_type) {
1946       case nir_tex_src_lod:
1947          explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1948          break;
1949       case nir_tex_src_texture_offset:
1950          texture_unit_offset = get_src(bld_base, instr->src[i].src);
1951          break;
1952       default:
1953          break;
1954       }
1955    }
1956 
1957    params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
1958    params.texture_unit = instr->texture_index;
1959    params.explicit_lod = explicit_lod;
1960    params.is_sviewinfo = TRUE;
1961    params.sizes_out = sizes_out;
1962    params.samples_only = (instr->op == nir_texop_texture_samples);
1963    params.texture_unit_offset = texture_unit_offset;
1964 
1965    if (instr->op == nir_texop_query_levels)
1966       params.explicit_lod = bld_base->uint_bld.zero;
1967    bld_base->tex_size(bld_base, &params);
1968    assign_dest(bld_base, &instr->dest, &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
1969 }
1970 
lp_build_nir_lod_property(struct lp_build_nir_context * bld_base,nir_src lod_src)1971 static enum lp_sampler_lod_property lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
1972                                                               nir_src lod_src)
1973 {
1974    enum lp_sampler_lod_property lod_property;
1975 
1976    if (nir_src_is_dynamically_uniform(lod_src))
1977       lod_property = LP_SAMPLER_LOD_SCALAR;
1978    else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1979       if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
1980          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1981       else
1982          lod_property = LP_SAMPLER_LOD_PER_QUAD;
1983    }
1984    else
1985       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1986    return lod_property;
1987 }
1988 
visit_tex(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)1989 static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1990 {
1991    struct gallivm_state *gallivm = bld_base->base.gallivm;
1992    LLVMBuilderRef builder = gallivm->builder;
1993    LLVMValueRef coords[5];
1994    LLVMValueRef offsets[3] = { NULL };
1995    LLVMValueRef explicit_lod = NULL, ms_index = NULL;
1996    struct lp_sampler_params params;
1997    struct lp_derivatives derivs;
1998    unsigned sample_key = 0;
1999    nir_deref_instr *texture_deref_instr = NULL;
2000    nir_deref_instr *sampler_deref_instr = NULL;
2001    LLVMValueRef texture_unit_offset = NULL;
2002    LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
2003    unsigned lod_src = 0;
2004    LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
2005 
2006    memset(&params, 0, sizeof(params));
2007    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2008 
2009    if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
2010       visit_txs(bld_base, instr);
2011       return;
2012    }
2013    if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2014       sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2015    else if (instr->op == nir_texop_tg4) {
2016       sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
2017       sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
2018    } else if (instr->op == nir_texop_lod)
2019       sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
2020    for (unsigned i = 0; i < instr->num_srcs; i++) {
2021       switch (instr->src[i].src_type) {
2022       case nir_tex_src_coord: {
2023          LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
2024          if (instr->coord_components == 1)
2025             coords[0] = coord;
2026          else {
2027             for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2028                coords[chan] = LLVMBuildExtractValue(builder, coord,
2029                                                     chan, "");
2030          }
2031          for (unsigned chan = instr->coord_components; chan < 5; chan++)
2032             coords[chan] = coord_undef;
2033 
2034          break;
2035       }
2036       case nir_tex_src_texture_deref:
2037          texture_deref_instr = nir_src_as_deref(instr->src[i].src);
2038          break;
2039       case nir_tex_src_sampler_deref:
2040          sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
2041          break;
2042       case nir_tex_src_comparator:
2043          sample_key |= LP_SAMPLER_SHADOW;
2044          coords[4] = get_src(bld_base, instr->src[i].src);
2045          coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
2046          break;
2047       case nir_tex_src_bias:
2048          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2049          lod_src = i;
2050          explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2051          break;
2052       case nir_tex_src_lod:
2053          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2054          lod_src = i;
2055          if (instr->op == nir_texop_txf)
2056             explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2057          else
2058             explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2059          break;
2060       case nir_tex_src_ddx: {
2061          int deriv_cnt = instr->coord_components;
2062          if (instr->is_array)
2063             deriv_cnt--;
2064          LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2065          if (deriv_cnt == 1)
2066             derivs.ddx[0] = deriv_val;
2067          else
2068             for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2069                derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
2070                                                         chan, "");
2071          for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2072             derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
2073          break;
2074       }
2075       case nir_tex_src_ddy: {
2076          int deriv_cnt = instr->coord_components;
2077          if (instr->is_array)
2078             deriv_cnt--;
2079          LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2080          if (deriv_cnt == 1)
2081             derivs.ddy[0] = deriv_val;
2082          else
2083             for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2084                derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
2085                                                         chan, "");
2086          for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2087             derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
2088          break;
2089       }
2090       case nir_tex_src_offset: {
2091          int offset_cnt = instr->coord_components;
2092          if (instr->is_array)
2093             offset_cnt--;
2094          LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
2095          sample_key |= LP_SAMPLER_OFFSETS;
2096          if (offset_cnt == 1)
2097             offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
2098          else {
2099             for (unsigned chan = 0; chan < offset_cnt; ++chan) {
2100                offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
2101                                                      chan, "");
2102                offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
2103             }
2104          }
2105          break;
2106       }
2107       case nir_tex_src_ms_index:
2108          sample_key |= LP_SAMPLER_FETCH_MS;
2109          ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2110          break;
2111 
2112       case nir_tex_src_texture_offset:
2113          texture_unit_offset = get_src(bld_base, instr->src[i].src);
2114          break;
2115       case nir_tex_src_sampler_offset:
2116          break;
2117       default:
2118          assert(0);
2119          break;
2120       }
2121    }
2122    if (!sampler_deref_instr)
2123       sampler_deref_instr = texture_deref_instr;
2124 
2125    if (explicit_lod)
2126       lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
2127 
2128    if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
2129        instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
2130       for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2131          coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
2132    else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2133       for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2134          coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
2135 
2136    if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
2137       /* move layer coord for 1d arrays. */
2138       coords[2] = coords[1];
2139       coords[1] = coord_undef;
2140    }
2141 
2142    uint32_t samp_base_index = 0, tex_base_index = 0;
2143    if (!sampler_deref_instr) {
2144       int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
2145       if (samp_src_index == -1) {
2146          samp_base_index = instr->sampler_index;
2147       }
2148    }
2149    if (!texture_deref_instr) {
2150       int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
2151       if (tex_src_index == -1) {
2152          tex_base_index = instr->texture_index;
2153       }
2154    }
2155 
2156    if (instr->op == nir_texop_txd) {
2157       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2158       params.derivs = &derivs;
2159       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2160          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2161             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2162          else
2163             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2164       } else
2165          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2166    }
2167 
2168    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2169    params.sample_key = sample_key;
2170    params.offsets = offsets;
2171    params.texture_index = tex_base_index;
2172    params.texture_index_offset = texture_unit_offset;
2173    params.sampler_index = samp_base_index;
2174    params.coords = coords;
2175    params.texel = texel;
2176    params.lod = explicit_lod;
2177    params.ms_index = ms_index;
2178    params.aniso_filter_table = bld_base->aniso_filter_table;
2179    bld_base->tex(bld_base, &params);
2180 
2181    if (nir_dest_bit_size(instr->dest) != 32) {
2182       assert(nir_dest_bit_size(instr->dest) == 16);
2183       LLVMTypeRef vec_type = NULL;
2184       bool is_float = false;
2185       switch (nir_alu_type_get_base_type(instr->dest_type)) {
2186       case nir_type_float:
2187          is_float = true;
2188 	 break;
2189       case nir_type_int:
2190          vec_type = bld_base->int16_bld.vec_type;
2191          break;
2192       case nir_type_uint:
2193          vec_type = bld_base->uint16_bld.vec_type;
2194          break;
2195       default:
2196          unreachable("unexpected alu type");
2197       }
2198       for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
2199          if (is_float) {
2200             texel[i] = lp_build_float_to_half(gallivm, texel[i]);
2201          } else {
2202             texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
2203             texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
2204          }
2205       }
2206    }
2207 
2208    assign_dest(bld_base, &instr->dest, texel);
2209 
2210 }
2211 
visit_ssa_undef(struct lp_build_nir_context * bld_base,const nir_ssa_undef_instr * instr)2212 static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
2213                             const nir_ssa_undef_instr *instr)
2214 {
2215    unsigned num_components = instr->def.num_components;
2216    LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2217    struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
2218    for (unsigned i = 0; i < num_components; i++)
2219       undef[i] = LLVMGetUndef(undef_bld->vec_type);
2220    memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
2221    assign_ssa_dest(bld_base, &instr->def, undef);
2222 }
2223 
visit_jump(struct lp_build_nir_context * bld_base,const nir_jump_instr * instr)2224 static void visit_jump(struct lp_build_nir_context *bld_base,
2225                        const nir_jump_instr *instr)
2226 {
2227    switch (instr->type) {
2228    case nir_jump_break:
2229       bld_base->break_stmt(bld_base);
2230       break;
2231    case nir_jump_continue:
2232       bld_base->continue_stmt(bld_base);
2233       break;
2234    default:
2235       unreachable("Unknown jump instr\n");
2236    }
2237 }
2238 
visit_deref(struct lp_build_nir_context * bld_base,nir_deref_instr * instr)2239 static void visit_deref(struct lp_build_nir_context *bld_base,
2240                         nir_deref_instr *instr)
2241 {
2242    if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2243                                         nir_var_mem_global))
2244       return;
2245    LLVMValueRef result = NULL;
2246    switch(instr->deref_type) {
2247    case nir_deref_type_var: {
2248       struct hash_entry *entry = _mesa_hash_table_search(bld_base->vars, instr->var);
2249       result = entry->data;
2250       break;
2251    }
2252    default:
2253       unreachable("Unhandled deref_instr deref type");
2254    }
2255 
2256    assign_ssa(bld_base, instr->dest.ssa.index, result);
2257 }
2258 
visit_block(struct lp_build_nir_context * bld_base,nir_block * block)2259 static void visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2260 {
2261    nir_foreach_instr(instr, block)
2262    {
2263       switch (instr->type) {
2264       case nir_instr_type_alu:
2265          visit_alu(bld_base, nir_instr_as_alu(instr));
2266          break;
2267       case nir_instr_type_load_const:
2268          visit_load_const(bld_base, nir_instr_as_load_const(instr));
2269          break;
2270       case nir_instr_type_intrinsic:
2271          visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2272          break;
2273       case nir_instr_type_tex:
2274          visit_tex(bld_base, nir_instr_as_tex(instr));
2275          break;
2276       case nir_instr_type_phi:
2277          assert(0);
2278          break;
2279       case nir_instr_type_ssa_undef:
2280          visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2281          break;
2282       case nir_instr_type_jump:
2283          visit_jump(bld_base, nir_instr_as_jump(instr));
2284          break;
2285       case nir_instr_type_deref:
2286          visit_deref(bld_base, nir_instr_as_deref(instr));
2287          break;
2288       default:
2289          fprintf(stderr, "Unknown NIR instr type: ");
2290          nir_print_instr(instr, stderr);
2291          fprintf(stderr, "\n");
2292          abort();
2293       }
2294    }
2295 }
2296 
visit_if(struct lp_build_nir_context * bld_base,nir_if * if_stmt)2297 static void visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2298 {
2299    LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2300 
2301    bld_base->if_cond(bld_base, cond);
2302    visit_cf_list(bld_base, &if_stmt->then_list);
2303 
2304    if (!exec_list_is_empty(&if_stmt->else_list)) {
2305       bld_base->else_stmt(bld_base);
2306       visit_cf_list(bld_base, &if_stmt->else_list);
2307    }
2308    bld_base->endif_stmt(bld_base);
2309 }
2310 
visit_loop(struct lp_build_nir_context * bld_base,nir_loop * loop)2311 static void visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2312 {
2313    bld_base->bgnloop(bld_base);
2314    visit_cf_list(bld_base, &loop->body);
2315    bld_base->endloop(bld_base);
2316 }
2317 
visit_cf_list(struct lp_build_nir_context * bld_base,struct exec_list * list)2318 static void visit_cf_list(struct lp_build_nir_context *bld_base,
2319                           struct exec_list *list)
2320 {
2321    foreach_list_typed(nir_cf_node, node, node, list)
2322    {
2323       switch (node->type) {
2324       case nir_cf_node_block:
2325          visit_block(bld_base, nir_cf_node_as_block(node));
2326          break;
2327 
2328       case nir_cf_node_if:
2329          visit_if(bld_base, nir_cf_node_as_if(node));
2330          break;
2331 
2332       case nir_cf_node_loop:
2333          visit_loop(bld_base, nir_cf_node_as_loop(node));
2334          break;
2335 
2336       default:
2337          assert(0);
2338       }
2339    }
2340 }
2341 
2342 static void
handle_shader_output_decl(struct lp_build_nir_context * bld_base,struct nir_shader * nir,struct nir_variable * variable)2343 handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2344                           struct nir_shader *nir,
2345                           struct nir_variable *variable)
2346 {
2347    bld_base->emit_var_decl(bld_base, variable);
2348 }
2349 
2350 /* vector registers are stored as arrays in LLVM side,
2351    so we can use GEP on them, as to do exec mask stores
2352    we need to operate on a single components.
2353    arrays are:
2354    0.x, 1.x, 2.x, 3.x
2355    0.y, 1.y, 2.y, 3.y
2356    ....
2357 */
get_register_type(struct lp_build_nir_context * bld_base,nir_register * reg)2358 static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
2359                                      nir_register *reg)
2360 {
2361    struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
2362 
2363    LLVMTypeRef type = int_bld->vec_type;
2364    if (reg->num_array_elems)
2365       type = LLVMArrayType(type, reg->num_array_elems);
2366    if (reg->num_components > 1)
2367       type = LLVMArrayType(type, reg->num_components);
2368 
2369    return type;
2370 }
2371 
2372 
lp_build_nir_llvm(struct lp_build_nir_context * bld_base,struct nir_shader * nir)2373 bool lp_build_nir_llvm(
2374    struct lp_build_nir_context *bld_base,
2375    struct nir_shader *nir)
2376 {
2377    struct nir_function *func;
2378 
2379    nir_convert_from_ssa(nir, true);
2380    nir_lower_locals_to_regs(nir);
2381    nir_remove_dead_derefs(nir);
2382    nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2383 
2384    nir_foreach_shader_out_variable(variable, nir)
2385       handle_shader_output_decl(bld_base, nir, variable);
2386 
2387    if (nir->info.io_lowered) {
2388       uint64_t outputs_written = nir->info.outputs_written;
2389 
2390       while (outputs_written) {
2391          unsigned location = u_bit_scan64(&outputs_written);
2392          nir_variable var = {0};
2393 
2394          var.type = glsl_vec4_type();
2395          var.data.mode = nir_var_shader_out;
2396          var.data.location = location;
2397          var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2398                                                     BITFIELD64_MASK(location));
2399          bld_base->emit_var_decl(bld_base, &var);
2400       }
2401    }
2402 
2403    bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2404                                             _mesa_key_pointer_equal);
2405    bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2406                                             _mesa_key_pointer_equal);
2407    bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
2408 
2409    func = (struct nir_function *)exec_list_get_head(&nir->functions);
2410 
2411    nir_foreach_register(reg, &func->impl->registers) {
2412       LLVMTypeRef type = get_register_type(bld_base, reg);
2413       LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2414                                                type, "reg");
2415       _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2416    }
2417    nir_index_ssa_defs(func->impl);
2418    bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2419    visit_cf_list(bld_base, &func->impl->body);
2420 
2421    free(bld_base->ssa_defs);
2422    ralloc_free(bld_base->vars);
2423    ralloc_free(bld_base->regs);
2424    ralloc_free(bld_base->range_ht);
2425    return true;
2426 }
2427 
2428 /* do some basic opts to remove some things we don't want to see. */
lp_build_opt_nir(struct nir_shader * nir)2429 void lp_build_opt_nir(struct nir_shader *nir)
2430 {
2431    bool progress;
2432 
2433    static const struct nir_lower_tex_options lower_tex_options = {
2434       .lower_tg4_offsets = true,
2435       .lower_txp = ~0u,
2436    };
2437    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2438    NIR_PASS_V(nir, nir_lower_frexp);
2439 
2440    NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2441    NIR_PASS_V(nir, nir_lower_fp16_casts);
2442    do {
2443       progress = false;
2444       NIR_PASS(progress, nir, nir_opt_constant_folding);
2445       NIR_PASS(progress, nir, nir_opt_algebraic);
2446       NIR_PASS(progress, nir, nir_lower_pack);
2447 
2448       nir_lower_tex_options options = { 0, };
2449       NIR_PASS_V(nir, nir_lower_tex, &options);
2450 
2451       const nir_lower_subgroups_options subgroups_options = {
2452 	.subgroup_size = lp_native_vector_width / 32,
2453 	.ballot_bit_size = 32,
2454         .ballot_components = 1,
2455 	.lower_to_scalar = true,
2456 	.lower_subgroup_masks = true,
2457       };
2458       NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
2459 
2460    } while (progress);
2461    nir_lower_bool_to_int32(nir);
2462 
2463    do {
2464       progress = false;
2465       NIR_PASS(progress, nir, nir_opt_algebraic_late);
2466       if (progress) {
2467          NIR_PASS_V(nir, nir_copy_prop);
2468          NIR_PASS_V(nir, nir_opt_dce);
2469          NIR_PASS_V(nir, nir_opt_cse);
2470       }
2471    } while (progress);
2472 }
2473