1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_arit.h"
28 #include "lp_bld_bitarit.h"
29 #include "lp_bld_const.h"
30 #include "lp_bld_conv.h"
31 #include "lp_bld_gather.h"
32 #include "lp_bld_logic.h"
33 #include "lp_bld_quad.h"
34 #include "lp_bld_flow.h"
35 #include "lp_bld_intr.h"
36 #include "lp_bld_struct.h"
37 #include "lp_bld_debug.h"
38 #include "lp_bld_printf.h"
39 #include "nir_deref.h"
40 #include "nir_search_helpers.h"
41 
is_aos(const struct lp_build_nir_context * bld_base)42 static bool is_aos(const struct lp_build_nir_context *bld_base)
43 {
44    return bld_base->base.type.length == 16 && bld_base->base.type.width == 8;
45 }
46 
47 static void visit_cf_list(struct lp_build_nir_context *bld_base,
48                           struct exec_list *list);
49 
cast_type(struct lp_build_nir_context * bld_base,LLVMValueRef val,nir_alu_type alu_type,unsigned bit_size)50 static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
51                               nir_alu_type alu_type, unsigned bit_size)
52 {
53    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
54    switch (alu_type) {
55    case nir_type_float:
56       switch (bit_size) {
57       case 16:
58          return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
59       case 32:
60          return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
61       case 64:
62          return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
63       default:
64          assert(0);
65          break;
66       }
67       break;
68    case nir_type_int:
69       switch (bit_size) {
70       case 8:
71          return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
72       case 16:
73          return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
74       case 32:
75          return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
76       case 64:
77          return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
78       default:
79          assert(0);
80          break;
81       }
82       break;
83    case nir_type_uint:
84       switch (bit_size) {
85       case 8:
86          return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
87       case 16:
88          return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
89       case 1:
90       case 32:
91          return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
92       case 64:
93          return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
94       default:
95          assert(0);
96          break;
97       }
98       break;
99    case nir_type_uint32:
100       return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
101    default:
102       return val;
103    }
104    return NULL;
105 }
106 
107 
glsl_sampler_to_pipe(int sampler_dim,bool is_array)108 static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array)
109 {
110    unsigned pipe_target = PIPE_BUFFER;
111    switch (sampler_dim) {
112    case GLSL_SAMPLER_DIM_1D:
113       pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
114       break;
115    case GLSL_SAMPLER_DIM_2D:
116       pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
117       break;
118    case GLSL_SAMPLER_DIM_SUBPASS:
119    case GLSL_SAMPLER_DIM_SUBPASS_MS:
120       pipe_target = PIPE_TEXTURE_2D_ARRAY;
121       break;
122    case GLSL_SAMPLER_DIM_3D:
123       pipe_target = PIPE_TEXTURE_3D;
124       break;
125    case GLSL_SAMPLER_DIM_MS:
126       pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
127       break;
128    case GLSL_SAMPLER_DIM_CUBE:
129       pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
130       break;
131    case GLSL_SAMPLER_DIM_RECT:
132       pipe_target = PIPE_TEXTURE_RECT;
133       break;
134    case GLSL_SAMPLER_DIM_BUF:
135       pipe_target = PIPE_BUFFER;
136       break;
137    default:
138       break;
139    }
140    return pipe_target;
141 }
142 
get_ssa_src(struct lp_build_nir_context * bld_base,nir_ssa_def * ssa)143 static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
144 {
145    return bld_base->ssa_defs[ssa->index];
146 }
147 
148 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src);
149 
get_reg_src(struct lp_build_nir_context * bld_base,nir_reg_src src)150 static LLVMValueRef get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
151 {
152    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
153    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
154    struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
155    LLVMValueRef indir_src = NULL;
156    if (src.indirect)
157       indir_src = get_src(bld_base, *src.indirect);
158    return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
159 }
160 
get_src(struct lp_build_nir_context * bld_base,nir_src src)161 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src)
162 {
163    if (src.is_ssa)
164       return get_ssa_src(bld_base, src.ssa);
165    else
166       return get_reg_src(bld_base, src.reg);
167 }
168 
assign_ssa(struct lp_build_nir_context * bld_base,int idx,LLVMValueRef ptr)169 static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
170 {
171    bld_base->ssa_defs[idx] = ptr;
172 }
173 
assign_ssa_dest(struct lp_build_nir_context * bld_base,const nir_ssa_def * ssa,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])174 static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
175                             LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
176 {
177    assign_ssa(bld_base, ssa->index, (ssa->num_components == 1 || is_aos(bld_base)) ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
178 }
179 
assign_reg(struct lp_build_nir_context * bld_base,const nir_reg_dest * reg,unsigned write_mask,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])180 static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
181                        unsigned write_mask,
182                        LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
183 {
184    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
185    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
186    struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
187    LLVMValueRef indir_src = NULL;
188    if (reg->indirect)
189       indir_src = get_src(bld_base, *reg->indirect);
190    bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
191 }
192 
assign_dest(struct lp_build_nir_context * bld_base,const nir_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])193 static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
194 {
195    if (dest->is_ssa)
196       assign_ssa_dest(bld_base, &dest->ssa, vals);
197    else
198       assign_reg(bld_base, &dest->reg, 0, vals);
199 }
200 
assign_alu_dest(struct lp_build_nir_context * bld_base,const nir_alu_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])201 static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
202 {
203    if (dest->dest.is_ssa)
204       assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
205    else
206       assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
207 }
208 
int_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,bool is_unsigned,LLVMValueRef val)209 static LLVMValueRef int_to_bool32(struct lp_build_nir_context *bld_base,
210                                 uint32_t src_bit_size,
211                                 bool is_unsigned,
212                                 LLVMValueRef val)
213 {
214    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
215    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
216    LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, int_bld->type, PIPE_FUNC_NOTEQUAL, val, int_bld->zero);
217    if (src_bit_size == 16)
218       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
219    else if (src_bit_size == 64)
220       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
221    return result;
222 }
223 
flt_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,LLVMValueRef val)224 static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
225                                   uint32_t src_bit_size,
226                                   LLVMValueRef val)
227 {
228    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
229    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
230    LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
231    if (src_bit_size == 64)
232       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
233    if (src_bit_size == 16)
234       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
235    return result;
236 }
237 
fcmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])238 static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
239                            enum pipe_compare_func compare,
240                            uint32_t src_bit_size,
241                            LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
242 {
243    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
244    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
245    LLVMValueRef result;
246 
247    if (compare != PIPE_FUNC_NOTEQUAL)
248       result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
249    else
250       result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
251    if (src_bit_size == 64)
252       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
253    else if (src_bit_size == 16)
254       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
255    return result;
256 }
257 
icmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,bool is_unsigned,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])258 static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base,
259                            enum pipe_compare_func compare,
260                            bool is_unsigned,
261                            uint32_t src_bit_size,
262                            LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
263 {
264    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
265    struct lp_build_context *i_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
266    LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
267    if (src_bit_size < 32)
268       result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
269    else if (src_bit_size == 64)
270       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
271    return result;
272 }
273 
get_alu_src(struct lp_build_nir_context * bld_base,nir_alu_src src,unsigned num_components)274 static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
275                                 nir_alu_src src,
276                                 unsigned num_components)
277 {
278    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
279    struct gallivm_state *gallivm = bld_base->base.gallivm;
280    LLVMValueRef value = get_src(bld_base, src.src);
281    bool need_swizzle = false;
282 
283    assert(value);
284 
285    if (is_aos(bld_base))
286       return value;
287 
288    unsigned src_components = nir_src_num_components(src.src);
289    for (unsigned i = 0; i < num_components; ++i) {
290       assert(src.swizzle[i] < src_components);
291       if (src.swizzle[i] != i)
292          need_swizzle = true;
293    }
294 
295    if (need_swizzle || num_components != src_components) {
296       if (src_components > 1 && num_components == 1) {
297          value = LLVMBuildExtractValue(gallivm->builder, value,
298                                        src.swizzle[0], "");
299       } else if (src_components == 1 && num_components > 1) {
300          LLVMValueRef values[] = {value, value, value, value, value, value, value, value, value, value, value, value, value, value, value, value};
301          value = lp_nir_array_build_gather_values(builder, values, num_components);
302       } else {
303          LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
304          for (unsigned i = 0; i < num_components; i++)
305             arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
306          value = arr;
307       }
308    }
309    assert(!src.negate);
310    assert(!src.abs);
311    return value;
312 }
313 
emit_b2f(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)314 static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
315                              LLVMValueRef src0,
316                              unsigned bitsize)
317 {
318    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
319    LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
320                                       LLVMBuildBitCast(builder, lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type,
321                                                                                    1.0), bld_base->int_bld.vec_type, ""),
322                                       "");
323    result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
324    switch (bitsize) {
325    case 16:
326       result = LLVMBuildFPTrunc(builder, result, bld_base->half_bld.vec_type, "");
327       break;
328    case 32:
329       break;
330    case 64:
331       result = LLVMBuildFPExt(builder, result, bld_base->dbl_bld.vec_type, "");
332       break;
333    default:
334       unreachable("unsupported bit size.");
335    }
336    return result;
337 }
338 
emit_b2i(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)339 static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base,
340                              LLVMValueRef src0,
341                              unsigned bitsize)
342 {
343    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
344    LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
345                                       lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, 1), "");
346    switch (bitsize) {
347    case 8:
348       return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
349    case 16:
350       return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
351    case 32:
352       return result;
353    case 64:
354       return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
355    default:
356       unreachable("unsupported bit size.");
357    }
358 }
359 
emit_b32csel(struct lp_build_nir_context * bld_base,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])360 static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base,
361                                unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
362                                LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
363 {
364    LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
365    LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
366    struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
367    return lp_build_select(bld, v, src[1], src[2]);
368 }
369 
split_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)370 static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
371                                 LLVMValueRef src,
372                                 bool hi)
373 {
374    struct gallivm_state *gallivm = bld_base->base.gallivm;
375    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
376    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
377    int len = bld_base->base.type.length * 2;
378    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
379 #if UTIL_ARCH_LITTLE_ENDIAN
380       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
381       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
382 #else
383       shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
384       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
385 #endif
386    }
387 
388    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
389    return LLVMBuildShuffleVector(gallivm->builder, src,
390                                  LLVMGetUndef(LLVMTypeOf(src)),
391                                  LLVMConstVector(hi ? shuffles2 : shuffles,
392                                                  bld_base->base.type.length),
393                                  "");
394 }
395 
396 static LLVMValueRef
merge_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)397 merge_64bit(struct lp_build_nir_context *bld_base,
398             LLVMValueRef input,
399             LLVMValueRef input2)
400 {
401    struct gallivm_state *gallivm = bld_base->base.gallivm;
402    LLVMBuilderRef builder = gallivm->builder;
403    int i;
404    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
405    int len = bld_base->base.type.length * 2;
406    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
407 
408    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
409 #if UTIL_ARCH_LITTLE_ENDIAN
410       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
411       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
412 #else
413       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
414       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
415 #endif
416    }
417    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
418 }
419 
split_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)420 static LLVMValueRef split_16bit(struct lp_build_nir_context *bld_base,
421                                 LLVMValueRef src,
422                                 bool hi)
423 {
424    struct gallivm_state *gallivm = bld_base->base.gallivm;
425    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
426    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
427    int len = bld_base->base.type.length * 2;
428    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
429 #if UTIL_ARCH_LITTLE_ENDIAN
430       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
431       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
432 #else
433       shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
434       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
435 #endif
436    }
437 
438    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
439    return LLVMBuildShuffleVector(gallivm->builder, src,
440                                  LLVMGetUndef(LLVMTypeOf(src)),
441                                  LLVMConstVector(hi ? shuffles2 : shuffles,
442                                                  bld_base->base.type.length),
443                                  "");
444 }
445 static LLVMValueRef
merge_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)446 merge_16bit(struct lp_build_nir_context *bld_base,
447             LLVMValueRef input,
448             LLVMValueRef input2)
449 {
450    struct gallivm_state *gallivm = bld_base->base.gallivm;
451    LLVMBuilderRef builder = gallivm->builder;
452    int i;
453    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
454    int len = bld_base->int16_bld.type.length * 2;
455    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
456 
457    for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
458 #if UTIL_ARCH_LITTLE_ENDIAN
459       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
460       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
461 #else
462       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
463       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
464 #endif
465    }
466    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
467 }
468 
get_signed_divisor(struct gallivm_state * gallivm,struct lp_build_context * int_bld,struct lp_build_context * mask_bld,int src_bit_size,LLVMValueRef src,LLVMValueRef divisor)469 static LLVMValueRef get_signed_divisor(struct gallivm_state *gallivm,
470                                        struct lp_build_context *int_bld,
471                                        struct lp_build_context *mask_bld,
472                                        int src_bit_size,
473                                        LLVMValueRef src, LLVMValueRef divisor)
474 {
475    LLVMBuilderRef builder = gallivm->builder;
476    /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
477       and divisor is -1. */
478    /* set mask if numerator == INT_MIN */
479    long long min_val;
480    switch (src_bit_size) {
481    case 8:
482       min_val = INT8_MIN;
483       break;
484    case 16:
485       min_val = INT16_MIN;
486       break;
487    default:
488    case 32:
489       min_val = INT_MIN;
490       break;
491    case 64:
492       min_val = INT64_MIN;
493       break;
494    }
495    LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
496                                          lp_build_const_int_vec(gallivm, int_bld->type, min_val));
497    /* set another mask if divisor is - 1 */
498    LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
499                                          lp_build_const_int_vec(gallivm, int_bld->type, -1));
500    div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
501 
502    divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
503    return divisor;
504 }
505 
506 static LLVMValueRef
do_int_divide(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)507 do_int_divide(struct lp_build_nir_context *bld_base,
508               bool is_unsigned, unsigned src_bit_size,
509               LLVMValueRef src, LLVMValueRef src2)
510 {
511    struct gallivm_state *gallivm = bld_base->base.gallivm;
512    LLVMBuilderRef builder = gallivm->builder;
513    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
514    struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
515 
516    /* avoid divide by 0. Converted divisor from 0 to -1 */
517    LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
518                                         mask_bld->zero);
519 
520    LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
521    if (!is_unsigned) {
522       divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
523                                    src_bit_size, src, divisor);
524    }
525    LLVMValueRef result = lp_build_div(int_bld, src, divisor);
526 
527    if (!is_unsigned) {
528       LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
529       return LLVMBuildAnd(builder, not_div_mask, result, "");
530    } else
531       /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
532        * may as well do same for idiv */
533       return LLVMBuildOr(builder, div_mask, result, "");
534 }
535 
536 static LLVMValueRef
do_int_mod(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)537 do_int_mod(struct lp_build_nir_context *bld_base,
538            bool is_unsigned, unsigned src_bit_size,
539            LLVMValueRef src, LLVMValueRef src2)
540 {
541    struct gallivm_state *gallivm = bld_base->base.gallivm;
542    LLVMBuilderRef builder = gallivm->builder;
543    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
544    struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
545    LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
546                                         mask_bld->zero);
547    LLVMValueRef divisor = LLVMBuildOr(builder,
548                                       div_mask,
549                                       src2, "");
550    if (!is_unsigned) {
551       divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
552                                    src_bit_size, src, divisor);
553    }
554    LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
555    return LLVMBuildOr(builder, div_mask, result, "");
556 }
557 
558 static LLVMValueRef
do_quantize_to_f16(struct lp_build_nir_context * bld_base,LLVMValueRef src)559 do_quantize_to_f16(struct lp_build_nir_context *bld_base,
560                    LLVMValueRef src)
561 {
562    struct gallivm_state *gallivm = bld_base->base.gallivm;
563    LLVMBuilderRef builder = gallivm->builder;
564    LLVMValueRef result, cond, cond2, temp;
565 
566    result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
567    result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
568 
569    temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
570    cond = LLVMBuildFCmp(builder, LLVMRealOGT,
571                         LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
572                         temp, "");
573    cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
574    cond = LLVMBuildAnd(builder, cond, cond2, "");
575    result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
576    return result;
577 }
578 
do_alu_action(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])579 static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
580                                   const nir_alu_instr *instr,
581                                   unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
582                                   LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
583 {
584    struct gallivm_state *gallivm = bld_base->base.gallivm;
585    LLVMBuilderRef builder = gallivm->builder;
586    LLVMValueRef result;
587 
588    switch (instr->op) {
589    case nir_op_b2f16:
590       result = emit_b2f(bld_base, src[0], 16);
591       break;
592    case nir_op_b2f32:
593       result = emit_b2f(bld_base, src[0], 32);
594       break;
595    case nir_op_b2f64:
596       result = emit_b2f(bld_base, src[0], 64);
597       break;
598    case nir_op_b2i8:
599       result = emit_b2i(bld_base, src[0], 8);
600       break;
601    case nir_op_b2i16:
602       result = emit_b2i(bld_base, src[0], 16);
603       break;
604    case nir_op_b2i32:
605       result = emit_b2i(bld_base, src[0], 32);
606       break;
607    case nir_op_b2i64:
608       result = emit_b2i(bld_base, src[0], 64);
609       break;
610    case nir_op_b32csel:
611       result = emit_b32csel(bld_base, src_bit_size, src);
612       break;
613    case nir_op_bit_count:
614       result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
615       if (src_bit_size[0] < 32)
616          result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
617       else if (src_bit_size[0] > 32)
618          result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
619       break;
620    case nir_op_bitfield_select:
621       result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
622       break;
623    case nir_op_bitfield_reverse:
624       result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
625       break;
626    case nir_op_f2b32:
627       result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
628       break;
629    case nir_op_f2f16:
630       if (src_bit_size[0] == 64)
631          src[0] = LLVMBuildFPTrunc(builder, src[0],
632                                    bld_base->base.vec_type, "");
633       result = LLVMBuildFPTrunc(builder, src[0],
634                                 bld_base->half_bld.vec_type, "");
635       break;
636    case nir_op_f2f32:
637       if (src_bit_size[0] < 32)
638          result = LLVMBuildFPExt(builder, src[0],
639                                  bld_base->base.vec_type, "");
640       else
641          result = LLVMBuildFPTrunc(builder, src[0],
642                                    bld_base->base.vec_type, "");
643       break;
644    case nir_op_f2f64:
645       result = LLVMBuildFPExt(builder, src[0],
646                               bld_base->dbl_bld.vec_type, "");
647       break;
648    case nir_op_f2i8:
649       result = LLVMBuildFPToSI(builder,
650                                src[0],
651                                bld_base->uint8_bld.vec_type, "");
652       break;
653    case nir_op_f2i16:
654       result = LLVMBuildFPToSI(builder,
655                                src[0],
656                                bld_base->uint16_bld.vec_type, "");
657       break;
658    case nir_op_f2i32:
659       result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
660       break;
661    case nir_op_f2u8:
662       result = LLVMBuildFPToUI(builder,
663                                src[0],
664                                bld_base->uint8_bld.vec_type, "");
665       break;
666    case nir_op_f2u16:
667       result = LLVMBuildFPToUI(builder,
668                                src[0],
669                                bld_base->uint16_bld.vec_type, "");
670       break;
671    case nir_op_f2u32:
672       result = LLVMBuildFPToUI(builder,
673                                src[0],
674                                bld_base->base.int_vec_type, "");
675       break;
676    case nir_op_f2i64:
677       result = LLVMBuildFPToSI(builder,
678                                src[0],
679                                bld_base->int64_bld.vec_type, "");
680       break;
681    case nir_op_f2u64:
682       result = LLVMBuildFPToUI(builder,
683                                src[0],
684                                bld_base->uint64_bld.vec_type, "");
685       break;
686    case nir_op_fabs:
687       result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
688       break;
689    case nir_op_fadd:
690       result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
691                             src[0], src[1]);
692       break;
693    case nir_op_fceil:
694       result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
695       break;
696    case nir_op_fcos:
697       result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
698       break;
699    case nir_op_fddx:
700    case nir_op_fddx_coarse:
701    case nir_op_fddx_fine:
702       result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
703       break;
704    case nir_op_fddy:
705    case nir_op_fddy_coarse:
706    case nir_op_fddy_fine:
707       result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
708       break;
709    case nir_op_fdiv:
710       result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
711                             src[0], src[1]);
712       break;
713    case nir_op_feq32:
714       result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
715       break;
716    case nir_op_fexp2:
717       result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
718       break;
719    case nir_op_ffloor:
720       result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
721       break;
722    case nir_op_ffma:
723       result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
724       break;
725    case nir_op_ffract: {
726       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
727       LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
728       result = lp_build_sub(flt_bld, src[0], tmp);
729       break;
730    }
731    case nir_op_fge:
732    case nir_op_fge32:
733       result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
734       break;
735    case nir_op_find_lsb: {
736       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
737       result = lp_build_cttz(int_bld, src[0]);
738       if (src_bit_size[0] < 32)
739          result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
740       else if (src_bit_size[0] > 32)
741          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
742       break;
743    }
744    case nir_op_fisfinite32:
745       unreachable("Should have been lowered in nir_opt_algebraic_late.");
746    case nir_op_flog2:
747       result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
748       break;
749    case nir_op_flt:
750    case nir_op_flt32:
751       result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
752       break;
753    case nir_op_fmax:
754    case nir_op_fmin: {
755       enum gallivm_nan_behavior minmax_nan;
756       int first = 0;
757 
758       /* If one of the sources is known to be a number (i.e., not NaN), then
759        * better code can be generated by passing that information along.
760        */
761       if (is_a_number(bld_base->range_ht, instr, 1,
762                       0 /* unused num_components */,
763                       NULL /* unused swizzle */)) {
764          minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
765       } else if (is_a_number(bld_base->range_ht, instr, 0,
766                              0 /* unused num_components */,
767                              NULL /* unused swizzle */)) {
768          first = 1;
769          minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
770       } else {
771          minmax_nan = GALLIVM_NAN_RETURN_OTHER;
772       }
773 
774       if (instr->op == nir_op_fmin) {
775          result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
776                                    src[first], src[1 - first], minmax_nan);
777       } else {
778          result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
779                                    src[first], src[1 - first], minmax_nan);
780       }
781       break;
782    }
783    case nir_op_fmod: {
784       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
785       result = lp_build_div(flt_bld, src[0], src[1]);
786       result = lp_build_floor(flt_bld, result);
787       result = lp_build_mul(flt_bld, src[1], result);
788       result = lp_build_sub(flt_bld, src[0], result);
789       break;
790    }
791    case nir_op_fmul:
792       result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
793                             src[0], src[1]);
794       break;
795    case nir_op_fneu32:
796       result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
797       break;
798    case nir_op_fneg:
799       result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
800       break;
801    case nir_op_fpow:
802       result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
803       break;
804    case nir_op_fquantize2f16:
805       result = do_quantize_to_f16(bld_base, src[0]);
806       break;
807    case nir_op_frcp:
808       result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
809       break;
810    case nir_op_fround_even:
811       if (src_bit_size[0] == 16) {
812 	 struct lp_build_context *bld = get_flt_bld(bld_base, 16);
813 	 char intrinsic[64];
814 	 lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
815 	 result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
816       } else
817 	 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
818       break;
819    case nir_op_frsq:
820       result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
821       break;
822    case nir_op_fsat:
823       result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
824       break;
825    case nir_op_fsign:
826       result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
827       break;
828    case nir_op_fsin:
829       result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
830       break;
831    case nir_op_fsqrt:
832       result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
833       break;
834    case nir_op_ftrunc:
835       result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
836       break;
837    case nir_op_i2b32:
838       result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
839       break;
840    case nir_op_i2f16:
841       result = LLVMBuildSIToFP(builder, src[0],
842                                bld_base->half_bld.vec_type, "");
843       break;
844    case nir_op_i2f32:
845       result = lp_build_int_to_float(&bld_base->base, src[0]);
846       break;
847    case nir_op_i2f64:
848       result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
849       break;
850    case nir_op_i2i8:
851       result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
852       break;
853    case nir_op_i2i16:
854       if (src_bit_size[0] < 16)
855          result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
856       else
857          result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
858       break;
859    case nir_op_i2i32:
860       if (src_bit_size[0] < 32)
861          result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
862       else
863          result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
864       break;
865    case nir_op_i2i64:
866       result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
867       break;
868    case nir_op_iabs:
869       result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
870       break;
871    case nir_op_iadd:
872       result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
873                             src[0], src[1]);
874       break;
875    case nir_op_iand:
876       result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
877                             src[0], src[1]);
878       break;
879    case nir_op_idiv:
880       result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
881       break;
882    case nir_op_ieq32:
883       result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
884       break;
885    case nir_op_ige32:
886       result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
887       break;
888    case nir_op_ilt32:
889       result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
890       break;
891    case nir_op_imax:
892       result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
893       break;
894    case nir_op_imin:
895       result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
896       break;
897    case nir_op_imul:
898    case nir_op_imul24:
899       result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
900                             src[0], src[1]);
901       break;
902    case nir_op_imul_high: {
903       LLVMValueRef hi_bits;
904       lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
905       result = hi_bits;
906       break;
907    }
908    case nir_op_ine32:
909       result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
910       break;
911    case nir_op_ineg:
912       result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
913       break;
914    case nir_op_inot:
915       result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
916       break;
917    case nir_op_ior:
918       result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
919                            src[0], src[1]);
920       break;
921    case nir_op_imod:
922    case nir_op_irem:
923       result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
924       break;
925    case nir_op_ishl: {
926       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
927       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
928       if (src_bit_size[0] == 64)
929          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
930       if (src_bit_size[0] < 32)
931          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
932       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
933       result = lp_build_shl(int_bld, src[0], src[1]);
934       break;
935    }
936    case nir_op_ishr: {
937       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
938       struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
939       if (src_bit_size[0] == 64)
940          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
941       if (src_bit_size[0] < 32)
942          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
943       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
944       result = lp_build_shr(int_bld, src[0], src[1]);
945       break;
946    }
947    case nir_op_isign:
948       result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
949       break;
950    case nir_op_isub:
951       result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
952                             src[0], src[1]);
953       break;
954    case nir_op_ixor:
955       result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
956                             src[0], src[1]);
957       break;
958    case nir_op_mov:
959       result = src[0];
960       break;
961    case nir_op_unpack_64_2x32_split_x:
962       result = split_64bit(bld_base, src[0], false);
963       break;
964    case nir_op_unpack_64_2x32_split_y:
965       result = split_64bit(bld_base, src[0], true);
966       break;
967 
968    case nir_op_pack_32_2x16_split: {
969       LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
970       result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
971       break;
972    }
973    case nir_op_unpack_32_2x16_split_x:
974       result = split_16bit(bld_base, src[0], false);
975       break;
976    case nir_op_unpack_32_2x16_split_y:
977       result = split_16bit(bld_base, src[0], true);
978       break;
979    case nir_op_pack_64_2x32_split: {
980       LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
981       result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
982       break;
983    }
984    case nir_op_pack_32_4x8_split: {
985       LLVMValueRef tmp1 = merge_16bit(bld_base, src[0], src[1]);
986       LLVMValueRef tmp2 = merge_16bit(bld_base, src[2], src[3]);
987       tmp1 = LLVMBuildBitCast(builder, tmp1, bld_base->uint16_bld.vec_type, "");
988       tmp2 = LLVMBuildBitCast(builder, tmp2, bld_base->uint16_bld.vec_type, "");
989       LLVMValueRef tmp = merge_16bit(bld_base, tmp1, tmp2);
990       result = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.vec_type, "");
991       break;
992    }
993    case nir_op_u2f16:
994       result = LLVMBuildUIToFP(builder, src[0],
995                                bld_base->half_bld.vec_type, "");
996       break;
997    case nir_op_u2f32:
998       result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
999       break;
1000    case nir_op_u2f64:
1001       result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
1002       break;
1003    case nir_op_u2u8:
1004       result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
1005       break;
1006    case nir_op_u2u16:
1007       if (src_bit_size[0] < 16)
1008          result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
1009       else
1010          result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
1011       break;
1012    case nir_op_u2u32:
1013       if (src_bit_size[0] < 32)
1014          result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
1015       else
1016          result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
1017       break;
1018    case nir_op_u2u64:
1019       result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
1020       break;
1021    case nir_op_udiv:
1022       result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
1023       break;
1024    case nir_op_ufind_msb: {
1025       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1026       result = lp_build_ctlz(uint_bld, src[0]);
1027       result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
1028       if (src_bit_size[0] < 32)
1029          result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
1030       else
1031          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
1032       break;
1033    }
1034    case nir_op_uge32:
1035       result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
1036       break;
1037    case nir_op_ult32:
1038       result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
1039       break;
1040    case nir_op_umax:
1041       result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1042       break;
1043    case nir_op_umin:
1044       result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1045       break;
1046    case nir_op_umod:
1047       result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
1048       break;
1049    case nir_op_umul_high: {
1050       LLVMValueRef hi_bits;
1051       lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
1052       result = hi_bits;
1053       break;
1054    }
1055    case nir_op_ushr: {
1056       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1057       if (src_bit_size[0] == 64)
1058          src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1059       if (src_bit_size[0] < 32)
1060          src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1061       src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1062       result = lp_build_shr(uint_bld, src[0], src[1]);
1063       break;
1064    }
1065    case nir_op_bcsel: {
1066       LLVMTypeRef src1_type = LLVMTypeOf(src[1]);
1067       LLVMTypeRef src2_type = LLVMTypeOf(src[2]);
1068 
1069       if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
1070           LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
1071          src[2] = LLVMBuildIntToPtr(builder, src[2], src1_type, "");
1072       } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
1073                  LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
1074          src[1] = LLVMBuildIntToPtr(builder, src[1], src2_type, "");
1075       }
1076 
1077       for (int i = 1; i <= 2; i++) {
1078          LLVMTypeRef type = LLVMTypeOf(src[i]);
1079          if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
1080             break;
1081          src[i] = LLVMBuildBitCast(builder, src[i], get_int_bld(bld_base, true, src_bit_size[i])->vec_type, "");
1082       }
1083       return LLVMBuildSelect(builder, src[0], src[1], src[2], "");
1084    }
1085    default:
1086       assert(0);
1087       break;
1088    }
1089    return result;
1090 }
1091 
visit_alu(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr)1092 static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr)
1093 {
1094    struct gallivm_state *gallivm = bld_base->base.gallivm;
1095    LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
1096    unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
1097    unsigned num_components = nir_dest_num_components(instr->dest.dest);
1098    unsigned src_components;
1099    switch (instr->op) {
1100    case nir_op_vec2:
1101    case nir_op_vec3:
1102    case nir_op_vec4:
1103    case nir_op_vec8:
1104    case nir_op_vec16:
1105       src_components = 1;
1106       break;
1107    case nir_op_pack_half_2x16:
1108       src_components = 2;
1109       break;
1110    case nir_op_unpack_half_2x16:
1111       src_components = 1;
1112       break;
1113    case nir_op_cube_face_coord_amd:
1114    case nir_op_cube_face_index_amd:
1115       src_components = 3;
1116       break;
1117    case nir_op_fsum2:
1118    case nir_op_fsum3:
1119    case nir_op_fsum4:
1120       src_components = nir_op_infos[instr->op].input_sizes[0];
1121       break;
1122    default:
1123       src_components = num_components;
1124       break;
1125    }
1126    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1127       src[i] = get_alu_src(bld_base, instr->src[i], src_components);
1128       src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
1129    }
1130 
1131    if (instr->op == nir_op_mov && is_aos(bld_base) && !instr->dest.dest.is_ssa) {
1132       for (unsigned i = 0; i < 4; i++) {
1133          if (instr->dest.write_mask & (1 << i)) {
1134             assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src);
1135          }
1136       }
1137       return;
1138    }
1139 
1140    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1141    if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
1142       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1143          result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1144       }
1145    } else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) {
1146       for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
1147          LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
1148                                                           src[0], c, "");
1149          temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
1150          result[0] = (c == 0) ? temp_chan : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), result[0], temp_chan);
1151       }
1152     } else if (is_aos(bld_base)) {
1153       if (instr->op == nir_op_fmul) {
1154          if (LLVMIsConstant(src[0]))
1155             src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
1156          if (LLVMIsConstant(src[1]))
1157             src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
1158       }
1159       result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
1160    } else {
1161       for (unsigned c = 0; c < num_components; c++) {
1162          LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
1163 
1164          for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1165             if (num_components > 1) {
1166                src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
1167                                                      src[i], c, "");
1168             } else
1169                src_chan[i] = src[i];
1170             src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1171          }
1172          result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
1173          result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
1174       }
1175    }
1176    assign_alu_dest(bld_base, &instr->dest, result);
1177  }
1178 
visit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr)1179 static void visit_load_const(struct lp_build_nir_context *bld_base,
1180                              const nir_load_const_instr *instr)
1181 {
1182    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1183    bld_base->load_const(bld_base, instr, result);
1184    assign_ssa_dest(bld_base, &instr->def, result);
1185 }
1186 
1187 static void
get_deref_offset(struct lp_build_nir_context * bld_base,nir_deref_instr * instr,bool vs_in,unsigned * vertex_index_out,LLVMValueRef * vertex_index_ref,unsigned * const_out,LLVMValueRef * indir_out)1188 get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1189                  bool vs_in, unsigned *vertex_index_out,
1190                  LLVMValueRef *vertex_index_ref,
1191                  unsigned *const_out, LLVMValueRef *indir_out)
1192 {
1193    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1194    nir_variable *var = nir_deref_instr_get_variable(instr);
1195    nir_deref_path path;
1196    unsigned idx_lvl = 1;
1197 
1198    nir_deref_path_init(&path, instr, NULL);
1199 
1200    if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1201       if (vertex_index_ref) {
1202          *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1203          if (vertex_index_out)
1204             *vertex_index_out = 0;
1205       } else {
1206          *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1207       }
1208       ++idx_lvl;
1209    }
1210 
1211    uint32_t const_offset = 0;
1212    LLVMValueRef offset = NULL;
1213 
1214    if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1215       assert(instr->deref_type == nir_deref_type_array);
1216       const_offset = nir_src_as_uint(instr->arr.index);
1217       goto out;
1218    }
1219 
1220    for (; path.path[idx_lvl]; ++idx_lvl) {
1221       const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1222       if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1223          unsigned index = path.path[idx_lvl]->strct.index;
1224 
1225          for (unsigned i = 0; i < index; i++) {
1226             const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1227             const_offset += glsl_count_attribute_slots(ft, vs_in);
1228          }
1229       } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1230          unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1231          if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1232            const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1233          } else {
1234            LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1235            idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1236            LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1237                                                idx_src);
1238            if (offset)
1239              offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1240            else
1241              offset = array_off;
1242          }
1243       } else
1244          unreachable("Uhandled deref type in get_deref_instr_offset");
1245    }
1246 
1247 out:
1248    nir_deref_path_finish(&path);
1249 
1250    if (const_offset && offset)
1251       offset = LLVMBuildAdd(builder, offset,
1252                             lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1253                             "");
1254    *const_out = const_offset;
1255    *indir_out = offset;
1256 }
1257 
1258 static void
visit_load_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1259 visit_load_input(struct lp_build_nir_context *bld_base,
1260                  nir_intrinsic_instr *instr,
1261                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1262 {
1263    nir_variable var = {0};
1264    var.data.location = nir_intrinsic_io_semantics(instr).location;
1265    var.data.driver_location = nir_intrinsic_base(instr);
1266    var.data.location_frac = nir_intrinsic_component(instr);
1267 
1268    unsigned nc = nir_dest_num_components(instr->dest);
1269    unsigned bit_size = nir_dest_bit_size(instr->dest);
1270 
1271    nir_src offset = *nir_get_io_offset_src(instr);
1272    bool indirect = !nir_src_is_const(offset);
1273    if (!indirect)
1274       assert(nir_src_as_uint(offset) == 0);
1275    LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1276 
1277    bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1278 }
1279 
1280 static void
visit_store_output(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1281 visit_store_output(struct lp_build_nir_context *bld_base,
1282                    nir_intrinsic_instr *instr)
1283 {
1284    nir_variable var = {0};
1285    var.data.location = nir_intrinsic_io_semantics(instr).location;
1286    var.data.driver_location = nir_intrinsic_base(instr);
1287    var.data.location_frac = nir_intrinsic_component(instr);
1288 
1289    unsigned mask = nir_intrinsic_write_mask(instr);
1290 
1291    unsigned bit_size = nir_src_bit_size(instr->src[0]);
1292    LLVMValueRef src = get_src(bld_base, instr->src[0]);
1293 
1294    nir_src offset = *nir_get_io_offset_src(instr);
1295    bool indirect = !nir_src_is_const(offset);
1296    if (!indirect)
1297       assert(nir_src_as_uint(offset) == 0);
1298    LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1299 
1300    if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1301       src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1302                                   src, 0, "");
1303    }
1304 
1305    bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1306                        bit_size, &var, mask, NULL, 0, indir_index, src);
1307 }
1308 
visit_load_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1309 static void visit_load_var(struct lp_build_nir_context *bld_base,
1310                            nir_intrinsic_instr *instr,
1311                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1312 {
1313    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1314    nir_variable *var = nir_deref_instr_get_variable(deref);
1315    assert(util_bitcount(deref->modes) == 1);
1316    nir_variable_mode mode = deref->modes;
1317    unsigned const_index;
1318    LLVMValueRef indir_index;
1319    LLVMValueRef indir_vertex_index = NULL;
1320    unsigned vertex_index = 0;
1321    unsigned nc = nir_dest_num_components(instr->dest);
1322    unsigned bit_size = nir_dest_bit_size(instr->dest);
1323    if (var) {
1324       bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1325          var->data.mode == nir_var_shader_in;
1326       bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1327          var->data.mode == nir_var_shader_in;
1328       bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1329          var->data.mode == nir_var_shader_in;
1330       bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1331          var->data.mode == nir_var_shader_out && !var->data.patch;
1332       bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1333          var->data.mode == nir_var_shader_in && !var->data.patch;
1334 
1335       mode = var->data.mode;
1336 
1337       get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1338                        &const_index, &indir_index);
1339    }
1340    bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, indir_vertex_index, const_index, indir_index, result);
1341 }
1342 
1343 static void
visit_store_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1344 visit_store_var(struct lp_build_nir_context *bld_base,
1345                 nir_intrinsic_instr *instr)
1346 {
1347    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1348    nir_variable *var = nir_deref_instr_get_variable(deref);
1349    assert(util_bitcount(deref->modes) == 1);
1350    nir_variable_mode mode = deref->modes;
1351    int writemask = instr->const_index[0];
1352    unsigned bit_size = nir_src_bit_size(instr->src[1]);
1353    LLVMValueRef src = get_src(bld_base, instr->src[1]);
1354    unsigned const_index = 0;
1355    LLVMValueRef indir_index, indir_vertex_index = NULL;
1356    if (var) {
1357       bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1358          var->data.mode == nir_var_shader_out && !var->data.patch;
1359       get_deref_offset(bld_base, deref, false, NULL, tcs_out ? &indir_vertex_index : NULL,
1360                        &const_index, &indir_index);
1361    }
1362    bld_base->store_var(bld_base, mode, instr->num_components, bit_size, var, writemask, indir_vertex_index, const_index, indir_index, src);
1363 }
1364 
visit_load_ubo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1365 static void visit_load_ubo(struct lp_build_nir_context *bld_base,
1366                            nir_intrinsic_instr *instr,
1367                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1368 {
1369    struct gallivm_state *gallivm = bld_base->base.gallivm;
1370    LLVMBuilderRef builder = gallivm->builder;
1371    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1372    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1373 
1374    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
1375    idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1376    bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1377                       offset_is_uniform, idx, offset, result);
1378 }
1379 
visit_load_push_constant(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[4])1380 static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
1381                                      nir_intrinsic_instr *instr,
1382                                      LLVMValueRef result[4])
1383 {
1384    struct gallivm_state *gallivm = bld_base->base.gallivm;
1385    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1386    LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1387    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1388 
1389    bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1390                       offset_is_uniform, idx, offset, result);
1391 }
1392 
1393 
visit_load_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1394 static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
1395                            nir_intrinsic_instr *instr,
1396                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1397 {
1398    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1399    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1400    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1401                        idx, offset, result);
1402 }
1403 
visit_store_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1404 static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
1405                              nir_intrinsic_instr *instr)
1406 {
1407    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1408    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
1409    LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1410    int writemask = instr->const_index[0];
1411    int nc = nir_src_num_components(instr->src[0]);
1412    int bitsize = nir_src_bit_size(instr->src[0]);
1413    bld_base->store_mem(bld_base, writemask, nc, bitsize, idx, offset, val);
1414 }
1415 
visit_get_ssbo_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1416 static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1417                                 nir_intrinsic_instr *instr,
1418                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1419 {
1420    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1421    result[0] = bld_base->get_ssbo_size(bld_base, idx);
1422 }
1423 
visit_ssbo_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1424 static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1425                               nir_intrinsic_instr *instr,
1426                               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1427 {
1428    LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1429    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1430    LLVMValueRef val = get_src(bld_base, instr->src[2]);
1431    LLVMValueRef val2 = NULL;
1432    int bitsize = nir_src_bit_size(instr->src[2]);
1433    if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1434       val2 = get_src(bld_base, instr->src[3]);
1435 
1436    bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx, offset, val, val2, &result[0]);
1437 
1438 }
1439 
visit_load_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1440 static void visit_load_image(struct lp_build_nir_context *bld_base,
1441                              nir_intrinsic_instr *instr,
1442                              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1443 {
1444    struct gallivm_state *gallivm = bld_base->base.gallivm;
1445    LLVMBuilderRef builder = gallivm->builder;
1446    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1447    nir_variable *var = nir_deref_instr_get_variable(deref);
1448    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1449    LLVMValueRef coords[5];
1450    struct lp_img_params params;
1451    const struct glsl_type *type = glsl_without_array(var->type);
1452    unsigned const_index;
1453    LLVMValueRef indir_index;
1454    get_deref_offset(bld_base, deref, false, NULL, NULL,
1455                     &const_index, &indir_index);
1456 
1457    memset(&params, 0, sizeof(params));
1458    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1459    for (unsigned i = 0; i < 4; i++)
1460       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1461    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1462       coords[2] = coords[1];
1463 
1464    params.coords = coords;
1465    params.outdata = result;
1466    params.img_op = LP_IMG_LOAD;
1467    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS)
1468       params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]), nir_type_uint, 32);
1469    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1470    params.image_index_offset = indir_index;
1471    bld_base->image_op(bld_base, &params);
1472 }
1473 
visit_store_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1474 static void visit_store_image(struct lp_build_nir_context *bld_base,
1475                               nir_intrinsic_instr *instr)
1476 {
1477    struct gallivm_state *gallivm = bld_base->base.gallivm;
1478    LLVMBuilderRef builder = gallivm->builder;
1479    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1480    nir_variable *var = nir_deref_instr_get_variable(deref);
1481    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1482    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1483    LLVMValueRef coords[5];
1484    struct lp_img_params params;
1485    const struct glsl_type *type = glsl_without_array(var->type);
1486    unsigned const_index;
1487    LLVMValueRef indir_index;
1488    get_deref_offset(bld_base, deref, false, NULL, NULL,
1489                     &const_index, &indir_index);
1490 
1491    memset(&params, 0, sizeof(params));
1492    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1493    for (unsigned i = 0; i < 4; i++)
1494       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1495    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1496       coords[2] = coords[1];
1497    params.coords = coords;
1498 
1499    for (unsigned i = 0; i < 4; i++) {
1500       params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1501       params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1502    }
1503    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1504       params.ms_index = get_src(bld_base, instr->src[2]);
1505    params.img_op = LP_IMG_STORE;
1506    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1507    params.image_index_offset = indir_index;
1508 
1509    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1510       coords[2] = coords[1];
1511    bld_base->image_op(bld_base, &params);
1512 }
1513 
visit_atomic_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1514 static void visit_atomic_image(struct lp_build_nir_context *bld_base,
1515                                nir_intrinsic_instr *instr,
1516                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1517 {
1518    struct gallivm_state *gallivm = bld_base->base.gallivm;
1519    LLVMBuilderRef builder = gallivm->builder;
1520    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1521    nir_variable *var = nir_deref_instr_get_variable(deref);
1522    struct lp_img_params params;
1523    LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1524    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1525    LLVMValueRef coords[5];
1526    const struct glsl_type *type = glsl_without_array(var->type);
1527    unsigned const_index;
1528    LLVMValueRef indir_index;
1529    get_deref_offset(bld_base, deref, false, NULL, NULL,
1530                     &const_index, &indir_index);
1531 
1532    memset(&params, 0, sizeof(params));
1533 
1534    switch (instr->intrinsic) {
1535    case nir_intrinsic_image_deref_atomic_add:
1536       params.op = LLVMAtomicRMWBinOpAdd;
1537       break;
1538    case nir_intrinsic_image_deref_atomic_exchange:
1539       params.op = LLVMAtomicRMWBinOpXchg;
1540       break;
1541    case nir_intrinsic_image_deref_atomic_and:
1542       params.op = LLVMAtomicRMWBinOpAnd;
1543       break;
1544    case nir_intrinsic_image_deref_atomic_or:
1545       params.op = LLVMAtomicRMWBinOpOr;
1546       break;
1547    case nir_intrinsic_image_deref_atomic_xor:
1548       params.op = LLVMAtomicRMWBinOpXor;
1549       break;
1550    case nir_intrinsic_image_deref_atomic_umin:
1551       params.op = LLVMAtomicRMWBinOpUMin;
1552       break;
1553    case nir_intrinsic_image_deref_atomic_umax:
1554       params.op = LLVMAtomicRMWBinOpUMax;
1555       break;
1556    case nir_intrinsic_image_deref_atomic_imin:
1557       params.op = LLVMAtomicRMWBinOpMin;
1558       break;
1559    case nir_intrinsic_image_deref_atomic_imax:
1560       params.op = LLVMAtomicRMWBinOpMax;
1561       break;
1562    default:
1563       break;
1564    }
1565 
1566    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1567    for (unsigned i = 0; i < 4; i++)
1568       coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1569    if (params.target == PIPE_TEXTURE_1D_ARRAY)
1570       coords[2] = coords[1];
1571    params.coords = coords;
1572    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1573       params.ms_index = get_src(bld_base, instr->src[2]);
1574    if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1575       LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1576       params.indata[0] = in_val;
1577       params.indata2[0] = cas_val;
1578    } else
1579       params.indata[0] = in_val;
1580 
1581    params.outdata = result;
1582    params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1583    params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1584    params.image_index_offset = indir_index;
1585 
1586    bld_base->image_op(bld_base, &params);
1587 }
1588 
1589 
visit_image_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1590 static void visit_image_size(struct lp_build_nir_context *bld_base,
1591                              nir_intrinsic_instr *instr,
1592                              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1593 {
1594    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1595    nir_variable *var = nir_deref_instr_get_variable(deref);
1596    struct lp_sampler_size_query_params params = { 0 };
1597    unsigned const_index;
1598    LLVMValueRef indir_index;
1599    const struct glsl_type *type = glsl_without_array(var->type);
1600    get_deref_offset(bld_base, deref, false, NULL, NULL,
1601                     &const_index, &indir_index);
1602    params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1603    params.texture_unit_offset = indir_index;
1604    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1605    params.sizes_out = result;
1606 
1607    bld_base->image_size(bld_base, &params);
1608 }
1609 
visit_image_samples(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1610 static void visit_image_samples(struct lp_build_nir_context *bld_base,
1611                                 nir_intrinsic_instr *instr,
1612                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1613 {
1614    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1615    nir_variable *var = nir_deref_instr_get_variable(deref);
1616    struct lp_sampler_size_query_params params = { 0 };
1617    unsigned const_index;
1618    LLVMValueRef indir_index;
1619    const struct glsl_type *type = glsl_without_array(var->type);
1620    get_deref_offset(bld_base, deref, false, NULL, NULL,
1621                     &const_index, &indir_index);
1622 
1623    params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1624    params.texture_unit_offset = indir_index;
1625    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1626    params.sizes_out = result;
1627    params.samples_only = true;
1628 
1629    bld_base->image_size(bld_base, &params);
1630 }
1631 
visit_shared_load(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1632 static void visit_shared_load(struct lp_build_nir_context *bld_base,
1633                                 nir_intrinsic_instr *instr,
1634                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1635 {
1636    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1637    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1638                       NULL, offset, result);
1639 }
1640 
visit_shared_store(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1641 static void visit_shared_store(struct lp_build_nir_context *bld_base,
1642                                nir_intrinsic_instr *instr)
1643 {
1644    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1645    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1646    int writemask = instr->const_index[1];
1647    int nc = nir_src_num_components(instr->src[0]);
1648    int bitsize = nir_src_bit_size(instr->src[0]);
1649    bld_base->store_mem(bld_base, writemask, nc, bitsize, NULL, offset, val);
1650 }
1651 
visit_shared_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1652 static void visit_shared_atomic(struct lp_build_nir_context *bld_base,
1653                                 nir_intrinsic_instr *instr,
1654                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1655 {
1656    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1657    LLVMValueRef val = get_src(bld_base, instr->src[1]);
1658    LLVMValueRef val2 = NULL;
1659    int bitsize = nir_src_bit_size(instr->src[1]);
1660    if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1661       val2 = get_src(bld_base, instr->src[2]);
1662 
1663    bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]);
1664 
1665 }
1666 
visit_barrier(struct lp_build_nir_context * bld_base)1667 static void visit_barrier(struct lp_build_nir_context *bld_base)
1668 {
1669    bld_base->barrier(bld_base);
1670 }
1671 
visit_discard(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1672 static void visit_discard(struct lp_build_nir_context *bld_base,
1673                           nir_intrinsic_instr *instr)
1674 {
1675    LLVMValueRef cond = NULL;
1676    if (instr->intrinsic == nir_intrinsic_discard_if) {
1677       cond = get_src(bld_base, instr->src[0]);
1678       cond = cast_type(bld_base, cond, nir_type_int, 32);
1679    }
1680    bld_base->discard(bld_base, cond);
1681 }
1682 
visit_load_kernel_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1683 static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1684                                     nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1685 {
1686    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1687 
1688    bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1689    bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1690                              nir_src_bit_size(instr->src[0]),
1691                              offset_is_uniform, offset, result);
1692 }
1693 
visit_load_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1694 static void visit_load_global(struct lp_build_nir_context *bld_base,
1695                               nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1696 {
1697    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1698    bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1699                          nir_src_bit_size(instr->src[0]),
1700                          addr, result);
1701 }
1702 
visit_store_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1703 static void visit_store_global(struct lp_build_nir_context *bld_base,
1704                                nir_intrinsic_instr *instr)
1705 {
1706    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1707    int nc = nir_src_num_components(instr->src[0]);
1708    int bitsize = nir_src_bit_size(instr->src[0]);
1709    LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1710    int addr_bitsize = nir_src_bit_size(instr->src[1]);
1711    int writemask = instr->const_index[0];
1712    bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
1713 }
1714 
visit_global_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1715 static void visit_global_atomic(struct lp_build_nir_context *bld_base,
1716                                 nir_intrinsic_instr *instr,
1717                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1718 {
1719    LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1720    LLVMValueRef val = get_src(bld_base, instr->src[1]);
1721    LLVMValueRef val2 = NULL;
1722    int addr_bitsize = nir_src_bit_size(instr->src[0]);
1723    int val_bitsize = nir_src_bit_size(instr->src[1]);
1724    if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1725       val2 = get_src(bld_base, instr->src[2]);
1726 
1727    bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
1728                            val_bitsize, addr, val, val2, &result[0]);
1729 }
1730 
visit_interp(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1731 static void visit_interp(struct lp_build_nir_context *bld_base,
1732                          nir_intrinsic_instr *instr,
1733                          LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1734 {
1735    struct gallivm_state *gallivm = bld_base->base.gallivm;
1736    LLVMBuilderRef builder = gallivm->builder;
1737    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1738    unsigned num_components = nir_dest_num_components(instr->dest);
1739    nir_variable *var = nir_deref_instr_get_variable(deref);
1740    unsigned const_index;
1741    LLVMValueRef indir_index;
1742    LLVMValueRef offsets[2] = { NULL, NULL };
1743    get_deref_offset(bld_base, deref, false, NULL, NULL,
1744                     &const_index, &indir_index);
1745    bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1746    bool sample = false;
1747    if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1748       for (unsigned i = 0; i < 2; i++) {
1749          offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1750          offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1751       }
1752    } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1753       offsets[0] = get_src(bld_base, instr->src[1]);
1754       offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1755       sample = true;
1756    }
1757    bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
1758 }
1759 
visit_load_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1760 static void visit_load_scratch(struct lp_build_nir_context *bld_base,
1761                                nir_intrinsic_instr *instr,
1762                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1763 {
1764    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1765 
1766    bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1767                           nir_dest_bit_size(instr->dest), offset, result);
1768 }
1769 
visit_store_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1770 static void visit_store_scratch(struct lp_build_nir_context *bld_base,
1771                                 nir_intrinsic_instr *instr)
1772 {
1773    LLVMValueRef val = get_src(bld_base, instr->src[0]);
1774    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1775    int writemask = instr->const_index[2];
1776    int nc = nir_src_num_components(instr->src[0]);
1777    int bitsize = nir_src_bit_size(instr->src[0]);
1778    bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1779 }
1780 
1781 
visit_intrinsic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1782 static void visit_intrinsic(struct lp_build_nir_context *bld_base,
1783                             nir_intrinsic_instr *instr)
1784 {
1785    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
1786    switch (instr->intrinsic) {
1787    case nir_intrinsic_load_input:
1788       visit_load_input(bld_base, instr, result);
1789       break;
1790    case nir_intrinsic_store_output:
1791       visit_store_output(bld_base, instr);
1792       break;
1793    case nir_intrinsic_load_deref:
1794       visit_load_var(bld_base, instr, result);
1795       break;
1796    case nir_intrinsic_store_deref:
1797       visit_store_var(bld_base, instr);
1798       break;
1799    case nir_intrinsic_load_ubo:
1800       visit_load_ubo(bld_base, instr, result);
1801       break;
1802    case nir_intrinsic_load_push_constant:
1803       visit_load_push_constant(bld_base, instr, result);
1804       break;
1805    case nir_intrinsic_load_ssbo:
1806       visit_load_ssbo(bld_base, instr, result);
1807       break;
1808    case nir_intrinsic_store_ssbo:
1809       visit_store_ssbo(bld_base, instr);
1810       break;
1811    case nir_intrinsic_get_ssbo_size:
1812       visit_get_ssbo_size(bld_base, instr, result);
1813       break;
1814    case nir_intrinsic_load_vertex_id:
1815    case nir_intrinsic_load_primitive_id:
1816    case nir_intrinsic_load_instance_id:
1817    case nir_intrinsic_load_base_instance:
1818    case nir_intrinsic_load_base_vertex:
1819    case nir_intrinsic_load_first_vertex:
1820    case nir_intrinsic_load_workgroup_id:
1821    case nir_intrinsic_load_local_invocation_id:
1822    case nir_intrinsic_load_local_invocation_index:
1823    case nir_intrinsic_load_num_workgroups:
1824    case nir_intrinsic_load_invocation_id:
1825    case nir_intrinsic_load_front_face:
1826    case nir_intrinsic_load_draw_id:
1827    case nir_intrinsic_load_workgroup_size:
1828    case nir_intrinsic_load_work_dim:
1829    case nir_intrinsic_load_tess_coord:
1830    case nir_intrinsic_load_tess_level_outer:
1831    case nir_intrinsic_load_tess_level_inner:
1832    case nir_intrinsic_load_patch_vertices_in:
1833    case nir_intrinsic_load_sample_id:
1834    case nir_intrinsic_load_sample_pos:
1835    case nir_intrinsic_load_sample_mask_in:
1836    case nir_intrinsic_load_view_index:
1837    case nir_intrinsic_load_subgroup_invocation:
1838    case nir_intrinsic_load_subgroup_id:
1839    case nir_intrinsic_load_num_subgroups:
1840       bld_base->sysval_intrin(bld_base, instr, result);
1841       break;
1842    case nir_intrinsic_load_helper_invocation:
1843       bld_base->helper_invocation(bld_base, &result[0]);
1844       break;
1845    case nir_intrinsic_discard_if:
1846    case nir_intrinsic_discard:
1847       visit_discard(bld_base, instr);
1848       break;
1849    case nir_intrinsic_emit_vertex:
1850       bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
1851       break;
1852    case nir_intrinsic_end_primitive:
1853       bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
1854       break;
1855    case nir_intrinsic_ssbo_atomic_add:
1856    case nir_intrinsic_ssbo_atomic_imin:
1857    case nir_intrinsic_ssbo_atomic_imax:
1858    case nir_intrinsic_ssbo_atomic_umin:
1859    case nir_intrinsic_ssbo_atomic_umax:
1860    case nir_intrinsic_ssbo_atomic_and:
1861    case nir_intrinsic_ssbo_atomic_or:
1862    case nir_intrinsic_ssbo_atomic_xor:
1863    case nir_intrinsic_ssbo_atomic_exchange:
1864    case nir_intrinsic_ssbo_atomic_comp_swap:
1865       visit_ssbo_atomic(bld_base, instr, result);
1866       break;
1867    case nir_intrinsic_image_deref_load:
1868       visit_load_image(bld_base, instr, result);
1869       break;
1870    case nir_intrinsic_image_deref_store:
1871       visit_store_image(bld_base, instr);
1872       break;
1873    case nir_intrinsic_image_deref_atomic_add:
1874    case nir_intrinsic_image_deref_atomic_imin:
1875    case nir_intrinsic_image_deref_atomic_imax:
1876    case nir_intrinsic_image_deref_atomic_umin:
1877    case nir_intrinsic_image_deref_atomic_umax:
1878    case nir_intrinsic_image_deref_atomic_and:
1879    case nir_intrinsic_image_deref_atomic_or:
1880    case nir_intrinsic_image_deref_atomic_xor:
1881    case nir_intrinsic_image_deref_atomic_exchange:
1882    case nir_intrinsic_image_deref_atomic_comp_swap:
1883       visit_atomic_image(bld_base, instr, result);
1884       break;
1885    case nir_intrinsic_image_deref_size:
1886       visit_image_size(bld_base, instr, result);
1887       break;
1888    case nir_intrinsic_image_deref_samples:
1889       visit_image_samples(bld_base, instr, result);
1890       break;
1891    case nir_intrinsic_load_shared:
1892       visit_shared_load(bld_base, instr, result);
1893       break;
1894    case nir_intrinsic_store_shared:
1895       visit_shared_store(bld_base, instr);
1896       break;
1897    case nir_intrinsic_shared_atomic_add:
1898    case nir_intrinsic_shared_atomic_imin:
1899    case nir_intrinsic_shared_atomic_umin:
1900    case nir_intrinsic_shared_atomic_imax:
1901    case nir_intrinsic_shared_atomic_umax:
1902    case nir_intrinsic_shared_atomic_and:
1903    case nir_intrinsic_shared_atomic_or:
1904    case nir_intrinsic_shared_atomic_xor:
1905    case nir_intrinsic_shared_atomic_exchange:
1906    case nir_intrinsic_shared_atomic_comp_swap:
1907       visit_shared_atomic(bld_base, instr, result);
1908       break;
1909    case nir_intrinsic_control_barrier:
1910    case nir_intrinsic_scoped_barrier:
1911       visit_barrier(bld_base);
1912       break;
1913    case nir_intrinsic_group_memory_barrier:
1914    case nir_intrinsic_memory_barrier:
1915    case nir_intrinsic_memory_barrier_shared:
1916    case nir_intrinsic_memory_barrier_buffer:
1917    case nir_intrinsic_memory_barrier_image:
1918    case nir_intrinsic_memory_barrier_tcs_patch:
1919       break;
1920    case nir_intrinsic_load_kernel_input:
1921       visit_load_kernel_input(bld_base, instr, result);
1922      break;
1923    case nir_intrinsic_load_global:
1924    case nir_intrinsic_load_global_constant:
1925       visit_load_global(bld_base, instr, result);
1926       break;
1927    case nir_intrinsic_store_global:
1928       visit_store_global(bld_base, instr);
1929       break;
1930    case nir_intrinsic_global_atomic_add:
1931    case nir_intrinsic_global_atomic_imin:
1932    case nir_intrinsic_global_atomic_umin:
1933    case nir_intrinsic_global_atomic_imax:
1934    case nir_intrinsic_global_atomic_umax:
1935    case nir_intrinsic_global_atomic_and:
1936    case nir_intrinsic_global_atomic_or:
1937    case nir_intrinsic_global_atomic_xor:
1938    case nir_intrinsic_global_atomic_exchange:
1939    case nir_intrinsic_global_atomic_comp_swap:
1940       visit_global_atomic(bld_base, instr, result);
1941       break;
1942    case nir_intrinsic_vote_all:
1943    case nir_intrinsic_vote_any:
1944    case nir_intrinsic_vote_ieq:
1945    case nir_intrinsic_vote_feq:
1946       bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1947       break;
1948    case nir_intrinsic_elect:
1949       bld_base->elect(bld_base, result);
1950       break;
1951    case nir_intrinsic_reduce:
1952    case nir_intrinsic_inclusive_scan:
1953    case nir_intrinsic_exclusive_scan:
1954       bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1955       break;
1956    case nir_intrinsic_ballot:
1957       bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
1958       break;
1959    case nir_intrinsic_read_invocation:
1960    case nir_intrinsic_read_first_invocation: {
1961       LLVMValueRef src1 = NULL;
1962 
1963       if (instr->intrinsic == nir_intrinsic_read_invocation)
1964          src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32);
1965       bld_base->read_invocation(bld_base, get_src(bld_base, instr->src[0]), nir_src_bit_size(instr->src[0]), src1, result);
1966       break;
1967    }
1968    case nir_intrinsic_interp_deref_at_offset:
1969    case nir_intrinsic_interp_deref_at_centroid:
1970    case nir_intrinsic_interp_deref_at_sample:
1971       visit_interp(bld_base, instr, result);
1972       break;
1973    case nir_intrinsic_load_scratch:
1974       visit_load_scratch(bld_base, instr, result);
1975       break;
1976    case nir_intrinsic_store_scratch:
1977       visit_store_scratch(bld_base, instr);
1978       break;
1979    default:
1980       fprintf(stderr, "Unsupported intrinsic: ");
1981       nir_print_instr(&instr->instr, stderr);
1982       fprintf(stderr, "\n");
1983       assert(0);
1984       break;
1985    }
1986    if (result[0]) {
1987       assign_dest(bld_base, &instr->dest, result);
1988    }
1989 }
1990 
visit_txs(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)1991 static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1992 {
1993    struct lp_sampler_size_query_params params = { 0 };
1994    LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
1995    LLVMValueRef explicit_lod = NULL;
1996    LLVMValueRef texture_unit_offset = NULL;
1997    for (unsigned i = 0; i < instr->num_srcs; i++) {
1998       switch (instr->src[i].src_type) {
1999       case nir_tex_src_lod:
2000          explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2001          break;
2002       case nir_tex_src_texture_offset:
2003          texture_unit_offset = get_src(bld_base, instr->src[i].src);
2004          break;
2005       default:
2006          break;
2007       }
2008    }
2009 
2010    params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
2011    params.texture_unit = instr->texture_index;
2012    params.explicit_lod = explicit_lod;
2013    params.is_sviewinfo = TRUE;
2014    params.sizes_out = sizes_out;
2015    params.samples_only = (instr->op == nir_texop_texture_samples);
2016    params.texture_unit_offset = texture_unit_offset;
2017 
2018    if (instr->op == nir_texop_query_levels)
2019       params.explicit_lod = bld_base->uint_bld.zero;
2020    bld_base->tex_size(bld_base, &params);
2021    assign_dest(bld_base, &instr->dest, &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
2022 }
2023 
lp_build_nir_lod_property(struct lp_build_nir_context * bld_base,nir_src lod_src)2024 static enum lp_sampler_lod_property lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
2025                                                               nir_src lod_src)
2026 {
2027    enum lp_sampler_lod_property lod_property;
2028 
2029    if (nir_src_is_always_uniform(lod_src))
2030       lod_property = LP_SAMPLER_LOD_SCALAR;
2031    else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2032       if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2033          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2034       else
2035          lod_property = LP_SAMPLER_LOD_PER_QUAD;
2036    }
2037    else
2038       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2039    return lod_property;
2040 }
2041 
visit_tex(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)2042 static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
2043 {
2044    struct gallivm_state *gallivm = bld_base->base.gallivm;
2045    LLVMBuilderRef builder = gallivm->builder;
2046    LLVMValueRef coords[5];
2047    LLVMValueRef offsets[3] = { NULL };
2048    LLVMValueRef explicit_lod = NULL, ms_index = NULL;
2049    struct lp_sampler_params params;
2050    struct lp_derivatives derivs;
2051    unsigned sample_key = 0;
2052    nir_deref_instr *texture_deref_instr = NULL;
2053    nir_deref_instr *sampler_deref_instr = NULL;
2054    LLVMValueRef texture_unit_offset = NULL;
2055    LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
2056    unsigned lod_src = 0;
2057    LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
2058    unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components;
2059    memset(&params, 0, sizeof(params));
2060    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2061 
2062    if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
2063       visit_txs(bld_base, instr);
2064       return;
2065    }
2066    if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2067       sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2068    else if (instr->op == nir_texop_tg4) {
2069       sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
2070       sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
2071    } else if (instr->op == nir_texop_lod)
2072       sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
2073    for (unsigned i = 0; i < instr->num_srcs; i++) {
2074       switch (instr->src[i].src_type) {
2075       case nir_tex_src_coord: {
2076          LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
2077          if (coord_vals == 1)
2078             coords[0] = coord;
2079          else {
2080             for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2081                coords[chan] = LLVMBuildExtractValue(builder, coord,
2082                                                     chan, "");
2083          }
2084          for (unsigned chan = coord_vals; chan < 5; chan++)
2085             coords[chan] = coord_undef;
2086 
2087          break;
2088       }
2089       case nir_tex_src_texture_deref:
2090          texture_deref_instr = nir_src_as_deref(instr->src[i].src);
2091          break;
2092       case nir_tex_src_sampler_deref:
2093          sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
2094          break;
2095       case nir_tex_src_comparator:
2096          sample_key |= LP_SAMPLER_SHADOW;
2097          coords[4] = get_src(bld_base, instr->src[i].src);
2098          coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
2099          break;
2100       case nir_tex_src_bias:
2101          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2102          lod_src = i;
2103          explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2104          break;
2105       case nir_tex_src_lod:
2106          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2107          lod_src = i;
2108          if (instr->op == nir_texop_txf)
2109             explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2110          else
2111             explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2112          break;
2113       case nir_tex_src_ddx: {
2114          int deriv_cnt = instr->coord_components;
2115          if (instr->is_array)
2116             deriv_cnt--;
2117          LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2118          if (deriv_cnt == 1)
2119             derivs.ddx[0] = deriv_val;
2120          else
2121             for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2122                derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
2123                                                         chan, "");
2124          for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2125             derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
2126          break;
2127       }
2128       case nir_tex_src_ddy: {
2129          int deriv_cnt = instr->coord_components;
2130          if (instr->is_array)
2131             deriv_cnt--;
2132          LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2133          if (deriv_cnt == 1)
2134             derivs.ddy[0] = deriv_val;
2135          else
2136             for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2137                derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
2138                                                         chan, "");
2139          for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2140             derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
2141          break;
2142       }
2143       case nir_tex_src_offset: {
2144          int offset_cnt = instr->coord_components;
2145          if (instr->is_array)
2146             offset_cnt--;
2147          LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
2148          sample_key |= LP_SAMPLER_OFFSETS;
2149          if (offset_cnt == 1)
2150             offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
2151          else {
2152             for (unsigned chan = 0; chan < offset_cnt; ++chan) {
2153                offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
2154                                                      chan, "");
2155                offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
2156             }
2157          }
2158          break;
2159       }
2160       case nir_tex_src_ms_index:
2161          sample_key |= LP_SAMPLER_FETCH_MS;
2162          ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2163          break;
2164 
2165       case nir_tex_src_texture_offset:
2166          texture_unit_offset = get_src(bld_base, instr->src[i].src);
2167          break;
2168       case nir_tex_src_sampler_offset:
2169          break;
2170       default:
2171          assert(0);
2172          break;
2173       }
2174    }
2175    if (!sampler_deref_instr)
2176       sampler_deref_instr = texture_deref_instr;
2177 
2178    if (explicit_lod)
2179       lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
2180 
2181    if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
2182        instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
2183       for (unsigned chan = 0; chan < coord_vals; ++chan)
2184          coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
2185    else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2186       for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2187          coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
2188 
2189    if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
2190       /* move layer coord for 1d arrays. */
2191       coords[2] = coords[1];
2192       coords[1] = coord_undef;
2193    }
2194 
2195    uint32_t samp_base_index = 0, tex_base_index = 0;
2196    if (!sampler_deref_instr) {
2197       int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
2198       if (samp_src_index == -1) {
2199          samp_base_index = instr->sampler_index;
2200       }
2201    }
2202    if (!texture_deref_instr) {
2203       int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
2204       if (tex_src_index == -1) {
2205          tex_base_index = instr->texture_index;
2206       }
2207    }
2208 
2209    if (instr->op == nir_texop_txd) {
2210       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2211       params.derivs = &derivs;
2212       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2213          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2214             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2215          else
2216             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2217       } else
2218          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2219    }
2220 
2221    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2222    params.sample_key = sample_key;
2223    params.offsets = offsets;
2224    params.texture_index = tex_base_index;
2225    params.texture_index_offset = texture_unit_offset;
2226    params.sampler_index = samp_base_index;
2227    params.coords = coords;
2228    params.texel = texel;
2229    params.lod = explicit_lod;
2230    params.ms_index = ms_index;
2231    params.aniso_filter_table = bld_base->aniso_filter_table;
2232    bld_base->tex(bld_base, &params);
2233 
2234    if (nir_dest_bit_size(instr->dest) != 32) {
2235       assert(nir_dest_bit_size(instr->dest) == 16);
2236       LLVMTypeRef vec_type = NULL;
2237       bool is_float = false;
2238       switch (nir_alu_type_get_base_type(instr->dest_type)) {
2239       case nir_type_float:
2240          is_float = true;
2241 	 break;
2242       case nir_type_int:
2243          vec_type = bld_base->int16_bld.vec_type;
2244          break;
2245       case nir_type_uint:
2246          vec_type = bld_base->uint16_bld.vec_type;
2247          break;
2248       default:
2249          unreachable("unexpected alu type");
2250       }
2251       for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
2252          if (is_float) {
2253             texel[i] = lp_build_float_to_half(gallivm, texel[i]);
2254          } else {
2255             texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
2256             texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
2257          }
2258       }
2259    }
2260 
2261    assign_dest(bld_base, &instr->dest, texel);
2262 
2263 }
2264 
visit_ssa_undef(struct lp_build_nir_context * bld_base,const nir_ssa_undef_instr * instr)2265 static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
2266                             const nir_ssa_undef_instr *instr)
2267 {
2268    unsigned num_components = instr->def.num_components;
2269    LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2270    struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
2271    for (unsigned i = 0; i < num_components; i++)
2272       undef[i] = LLVMGetUndef(undef_bld->vec_type);
2273    memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
2274    assign_ssa_dest(bld_base, &instr->def, undef);
2275 }
2276 
visit_jump(struct lp_build_nir_context * bld_base,const nir_jump_instr * instr)2277 static void visit_jump(struct lp_build_nir_context *bld_base,
2278                        const nir_jump_instr *instr)
2279 {
2280    switch (instr->type) {
2281    case nir_jump_break:
2282       bld_base->break_stmt(bld_base);
2283       break;
2284    case nir_jump_continue:
2285       bld_base->continue_stmt(bld_base);
2286       break;
2287    default:
2288       unreachable("Unknown jump instr\n");
2289    }
2290 }
2291 
visit_deref(struct lp_build_nir_context * bld_base,nir_deref_instr * instr)2292 static void visit_deref(struct lp_build_nir_context *bld_base,
2293                         nir_deref_instr *instr)
2294 {
2295    if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2296                                         nir_var_mem_global))
2297       return;
2298    LLVMValueRef result = NULL;
2299    switch(instr->deref_type) {
2300    case nir_deref_type_var: {
2301       struct hash_entry *entry = _mesa_hash_table_search(bld_base->vars, instr->var);
2302       result = entry->data;
2303       break;
2304    }
2305    default:
2306       unreachable("Unhandled deref_instr deref type");
2307    }
2308 
2309    assign_ssa(bld_base, instr->dest.ssa.index, result);
2310 }
2311 
visit_block(struct lp_build_nir_context * bld_base,nir_block * block)2312 static void visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2313 {
2314    nir_foreach_instr(instr, block)
2315    {
2316       switch (instr->type) {
2317       case nir_instr_type_alu:
2318          visit_alu(bld_base, nir_instr_as_alu(instr));
2319          break;
2320       case nir_instr_type_load_const:
2321          visit_load_const(bld_base, nir_instr_as_load_const(instr));
2322          break;
2323       case nir_instr_type_intrinsic:
2324          visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2325          break;
2326       case nir_instr_type_tex:
2327          visit_tex(bld_base, nir_instr_as_tex(instr));
2328          break;
2329       case nir_instr_type_phi:
2330          assert(0);
2331          break;
2332       case nir_instr_type_ssa_undef:
2333          visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2334          break;
2335       case nir_instr_type_jump:
2336          visit_jump(bld_base, nir_instr_as_jump(instr));
2337          break;
2338       case nir_instr_type_deref:
2339          visit_deref(bld_base, nir_instr_as_deref(instr));
2340          break;
2341       default:
2342          fprintf(stderr, "Unknown NIR instr type: ");
2343          nir_print_instr(instr, stderr);
2344          fprintf(stderr, "\n");
2345          abort();
2346       }
2347    }
2348 }
2349 
visit_if(struct lp_build_nir_context * bld_base,nir_if * if_stmt)2350 static void visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2351 {
2352    LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2353 
2354    bld_base->if_cond(bld_base, cond);
2355    visit_cf_list(bld_base, &if_stmt->then_list);
2356 
2357    if (!exec_list_is_empty(&if_stmt->else_list)) {
2358       bld_base->else_stmt(bld_base);
2359       visit_cf_list(bld_base, &if_stmt->else_list);
2360    }
2361    bld_base->endif_stmt(bld_base);
2362 }
2363 
visit_loop(struct lp_build_nir_context * bld_base,nir_loop * loop)2364 static void visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2365 {
2366    bld_base->bgnloop(bld_base);
2367    visit_cf_list(bld_base, &loop->body);
2368    bld_base->endloop(bld_base);
2369 }
2370 
visit_cf_list(struct lp_build_nir_context * bld_base,struct exec_list * list)2371 static void visit_cf_list(struct lp_build_nir_context *bld_base,
2372                           struct exec_list *list)
2373 {
2374    foreach_list_typed(nir_cf_node, node, node, list)
2375    {
2376       switch (node->type) {
2377       case nir_cf_node_block:
2378          visit_block(bld_base, nir_cf_node_as_block(node));
2379          break;
2380 
2381       case nir_cf_node_if:
2382          visit_if(bld_base, nir_cf_node_as_if(node));
2383          break;
2384 
2385       case nir_cf_node_loop:
2386          visit_loop(bld_base, nir_cf_node_as_loop(node));
2387          break;
2388 
2389       default:
2390          assert(0);
2391       }
2392    }
2393 }
2394 
2395 static void
handle_shader_output_decl(struct lp_build_nir_context * bld_base,struct nir_shader * nir,struct nir_variable * variable)2396 handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2397                           struct nir_shader *nir,
2398                           struct nir_variable *variable)
2399 {
2400    bld_base->emit_var_decl(bld_base, variable);
2401 }
2402 
2403 /* vector registers are stored as arrays in LLVM side,
2404    so we can use GEP on them, as to do exec mask stores
2405    we need to operate on a single components.
2406    arrays are:
2407    0.x, 1.x, 2.x, 3.x
2408    0.y, 1.y, 2.y, 3.y
2409    ....
2410 */
get_register_type(struct lp_build_nir_context * bld_base,nir_register * reg)2411 static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
2412                                      nir_register *reg)
2413 {
2414    if (is_aos(bld_base))
2415       return bld_base->base.int_vec_type;
2416 
2417    struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
2418 
2419    LLVMTypeRef type = int_bld->vec_type;
2420    if (reg->num_array_elems)
2421       type = LLVMArrayType(type, reg->num_array_elems);
2422    if (reg->num_components > 1)
2423       type = LLVMArrayType(type, reg->num_components);
2424 
2425    return type;
2426 }
2427 
2428 
lp_build_nir_llvm(struct lp_build_nir_context * bld_base,struct nir_shader * nir)2429 bool lp_build_nir_llvm(
2430    struct lp_build_nir_context *bld_base,
2431    struct nir_shader *nir)
2432 {
2433    struct nir_function *func;
2434 
2435    nir_convert_from_ssa(nir, true);
2436    nir_lower_locals_to_regs(nir);
2437    nir_remove_dead_derefs(nir);
2438    nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2439 
2440    if (is_aos(bld_base)) {
2441       nir_move_vec_src_uses_to_dest(nir);
2442       nir_lower_vec_to_movs(nir, NULL, NULL);
2443    }
2444 
2445    nir_foreach_shader_out_variable(variable, nir)
2446       handle_shader_output_decl(bld_base, nir, variable);
2447 
2448    if (nir->info.io_lowered) {
2449       uint64_t outputs_written = nir->info.outputs_written;
2450 
2451       while (outputs_written) {
2452          unsigned location = u_bit_scan64(&outputs_written);
2453          nir_variable var = {0};
2454 
2455          var.type = glsl_vec4_type();
2456          var.data.mode = nir_var_shader_out;
2457          var.data.location = location;
2458          var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2459                                                     BITFIELD64_MASK(location));
2460          bld_base->emit_var_decl(bld_base, &var);
2461       }
2462    }
2463 
2464    bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2465                                             _mesa_key_pointer_equal);
2466    bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2467                                             _mesa_key_pointer_equal);
2468    bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
2469 
2470    func = (struct nir_function *)exec_list_get_head(&nir->functions);
2471 
2472    nir_foreach_register(reg, &func->impl->registers) {
2473       LLVMTypeRef type = get_register_type(bld_base, reg);
2474       LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2475                                                type, "reg");
2476       _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2477    }
2478    nir_index_ssa_defs(func->impl);
2479    bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2480    visit_cf_list(bld_base, &func->impl->body);
2481 
2482    free(bld_base->ssa_defs);
2483    ralloc_free(bld_base->vars);
2484    ralloc_free(bld_base->regs);
2485    ralloc_free(bld_base->range_ht);
2486    return true;
2487 }
2488 
2489 /* do some basic opts to remove some things we don't want to see. */
lp_build_opt_nir(struct nir_shader * nir)2490 void lp_build_opt_nir(struct nir_shader *nir)
2491 {
2492    bool progress;
2493 
2494    static const struct nir_lower_tex_options lower_tex_options = {
2495       .lower_tg4_offsets = true,
2496       .lower_txp = ~0u,
2497    };
2498    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2499    NIR_PASS_V(nir, nir_lower_frexp);
2500 
2501    NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2502    NIR_PASS_V(nir, nir_lower_fp16_casts);
2503    do {
2504       progress = false;
2505       NIR_PASS(progress, nir, nir_opt_constant_folding);
2506       NIR_PASS(progress, nir, nir_opt_algebraic);
2507       NIR_PASS(progress, nir, nir_lower_pack);
2508 
2509       nir_lower_tex_options options = { 0, };
2510       NIR_PASS_V(nir, nir_lower_tex, &options);
2511 
2512       const nir_lower_subgroups_options subgroups_options = {
2513 	.subgroup_size = lp_native_vector_width / 32,
2514 	.ballot_bit_size = 32,
2515         .ballot_components = 1,
2516 	.lower_to_scalar = true,
2517 	.lower_subgroup_masks = true,
2518       };
2519       NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
2520 
2521    } while (progress);
2522 
2523    do {
2524       progress = false;
2525       NIR_PASS(progress, nir, nir_opt_algebraic_late);
2526       if (progress) {
2527          NIR_PASS_V(nir, nir_copy_prop);
2528          NIR_PASS_V(nir, nir_opt_dce);
2529          NIR_PASS_V(nir, nir_opt_cse);
2530       }
2531    } while (progress);
2532 
2533    if (nir_lower_bool_to_int32(nir)) {
2534       NIR_PASS_V(nir, nir_copy_prop);
2535       NIR_PASS_V(nir, nir_opt_dce);
2536    }
2537 }
2538