1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_arit.h"
28 #include "lp_bld_bitarit.h"
29 #include "lp_bld_const.h"
30 #include "lp_bld_conv.h"
31 #include "lp_bld_gather.h"
32 #include "lp_bld_logic.h"
33 #include "lp_bld_quad.h"
34 #include "lp_bld_flow.h"
35 #include "lp_bld_intr.h"
36 #include "lp_bld_struct.h"
37 #include "lp_bld_debug.h"
38 #include "lp_bld_printf.h"
39 #include "nir_deref.h"
40 #include "nir_search_helpers.h"
41
is_aos(const struct lp_build_nir_context * bld_base)42 static bool is_aos(const struct lp_build_nir_context *bld_base)
43 {
44 return bld_base->base.type.length == 16 && bld_base->base.type.width == 8;
45 }
46
47 static void visit_cf_list(struct lp_build_nir_context *bld_base,
48 struct exec_list *list);
49
cast_type(struct lp_build_nir_context * bld_base,LLVMValueRef val,nir_alu_type alu_type,unsigned bit_size)50 static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
51 nir_alu_type alu_type, unsigned bit_size)
52 {
53 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
54 switch (alu_type) {
55 case nir_type_float:
56 switch (bit_size) {
57 case 16:
58 return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
59 case 32:
60 return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
61 case 64:
62 return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
63 default:
64 assert(0);
65 break;
66 }
67 break;
68 case nir_type_int:
69 switch (bit_size) {
70 case 8:
71 return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
72 case 16:
73 return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
74 case 32:
75 return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
76 case 64:
77 return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
78 default:
79 assert(0);
80 break;
81 }
82 break;
83 case nir_type_uint:
84 switch (bit_size) {
85 case 8:
86 return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
87 case 16:
88 return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
89 case 1:
90 case 32:
91 return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
92 case 64:
93 return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
94 default:
95 assert(0);
96 break;
97 }
98 break;
99 case nir_type_uint32:
100 return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
101 default:
102 return val;
103 }
104 return NULL;
105 }
106
107
glsl_sampler_to_pipe(int sampler_dim,bool is_array)108 static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array)
109 {
110 unsigned pipe_target = PIPE_BUFFER;
111 switch (sampler_dim) {
112 case GLSL_SAMPLER_DIM_1D:
113 pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
114 break;
115 case GLSL_SAMPLER_DIM_2D:
116 pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
117 break;
118 case GLSL_SAMPLER_DIM_SUBPASS:
119 case GLSL_SAMPLER_DIM_SUBPASS_MS:
120 pipe_target = PIPE_TEXTURE_2D_ARRAY;
121 break;
122 case GLSL_SAMPLER_DIM_3D:
123 pipe_target = PIPE_TEXTURE_3D;
124 break;
125 case GLSL_SAMPLER_DIM_MS:
126 pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
127 break;
128 case GLSL_SAMPLER_DIM_CUBE:
129 pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
130 break;
131 case GLSL_SAMPLER_DIM_RECT:
132 pipe_target = PIPE_TEXTURE_RECT;
133 break;
134 case GLSL_SAMPLER_DIM_BUF:
135 pipe_target = PIPE_BUFFER;
136 break;
137 default:
138 break;
139 }
140 return pipe_target;
141 }
142
get_ssa_src(struct lp_build_nir_context * bld_base,nir_ssa_def * ssa)143 static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
144 {
145 return bld_base->ssa_defs[ssa->index];
146 }
147
148 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src);
149
get_reg_src(struct lp_build_nir_context * bld_base,nir_reg_src src)150 static LLVMValueRef get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
151 {
152 struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
153 LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
154 struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
155 LLVMValueRef indir_src = NULL;
156 if (src.indirect)
157 indir_src = get_src(bld_base, *src.indirect);
158 return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
159 }
160
get_src(struct lp_build_nir_context * bld_base,nir_src src)161 static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src)
162 {
163 if (src.is_ssa)
164 return get_ssa_src(bld_base, src.ssa);
165 else
166 return get_reg_src(bld_base, src.reg);
167 }
168
assign_ssa(struct lp_build_nir_context * bld_base,int idx,LLVMValueRef ptr)169 static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
170 {
171 bld_base->ssa_defs[idx] = ptr;
172 }
173
assign_ssa_dest(struct lp_build_nir_context * bld_base,const nir_ssa_def * ssa,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])174 static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
175 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
176 {
177 assign_ssa(bld_base, ssa->index, (ssa->num_components == 1 || is_aos(bld_base)) ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
178 }
179
assign_reg(struct lp_build_nir_context * bld_base,const nir_reg_dest * reg,unsigned write_mask,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])180 static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
181 unsigned write_mask,
182 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
183 {
184 struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
185 LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
186 struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
187 LLVMValueRef indir_src = NULL;
188 if (reg->indirect)
189 indir_src = get_src(bld_base, *reg->indirect);
190 bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
191 }
192
assign_dest(struct lp_build_nir_context * bld_base,const nir_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])193 static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
194 {
195 if (dest->is_ssa)
196 assign_ssa_dest(bld_base, &dest->ssa, vals);
197 else
198 assign_reg(bld_base, &dest->reg, 0, vals);
199 }
200
assign_alu_dest(struct lp_build_nir_context * bld_base,const nir_alu_dest * dest,LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])201 static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
202 {
203 if (dest->dest.is_ssa)
204 assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
205 else
206 assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
207 }
208
int_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,bool is_unsigned,LLVMValueRef val)209 static LLVMValueRef int_to_bool32(struct lp_build_nir_context *bld_base,
210 uint32_t src_bit_size,
211 bool is_unsigned,
212 LLVMValueRef val)
213 {
214 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
215 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
216 LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, int_bld->type, PIPE_FUNC_NOTEQUAL, val, int_bld->zero);
217 if (src_bit_size == 16)
218 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
219 else if (src_bit_size == 64)
220 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
221 return result;
222 }
223
flt_to_bool32(struct lp_build_nir_context * bld_base,uint32_t src_bit_size,LLVMValueRef val)224 static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
225 uint32_t src_bit_size,
226 LLVMValueRef val)
227 {
228 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
229 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
230 LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
231 if (src_bit_size == 64)
232 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
233 if (src_bit_size == 16)
234 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
235 return result;
236 }
237
fcmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])238 static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
239 enum pipe_compare_func compare,
240 uint32_t src_bit_size,
241 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
242 {
243 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
244 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
245 LLVMValueRef result;
246
247 if (compare != PIPE_FUNC_NOTEQUAL)
248 result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
249 else
250 result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
251 if (src_bit_size == 64)
252 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
253 else if (src_bit_size == 16)
254 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
255 return result;
256 }
257
icmp32(struct lp_build_nir_context * bld_base,enum pipe_compare_func compare,bool is_unsigned,uint32_t src_bit_size,LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])258 static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base,
259 enum pipe_compare_func compare,
260 bool is_unsigned,
261 uint32_t src_bit_size,
262 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
263 {
264 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
265 struct lp_build_context *i_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
266 LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
267 if (src_bit_size < 32)
268 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
269 else if (src_bit_size == 64)
270 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
271 return result;
272 }
273
get_alu_src(struct lp_build_nir_context * bld_base,nir_alu_src src,unsigned num_components)274 static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
275 nir_alu_src src,
276 unsigned num_components)
277 {
278 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
279 struct gallivm_state *gallivm = bld_base->base.gallivm;
280 LLVMValueRef value = get_src(bld_base, src.src);
281 bool need_swizzle = false;
282
283 assert(value);
284
285 if (is_aos(bld_base))
286 return value;
287
288 unsigned src_components = nir_src_num_components(src.src);
289 for (unsigned i = 0; i < num_components; ++i) {
290 assert(src.swizzle[i] < src_components);
291 if (src.swizzle[i] != i)
292 need_swizzle = true;
293 }
294
295 if (need_swizzle || num_components != src_components) {
296 if (src_components > 1 && num_components == 1) {
297 value = LLVMBuildExtractValue(gallivm->builder, value,
298 src.swizzle[0], "");
299 } else if (src_components == 1 && num_components > 1) {
300 LLVMValueRef values[] = {value, value, value, value, value, value, value, value, value, value, value, value, value, value, value, value};
301 value = lp_nir_array_build_gather_values(builder, values, num_components);
302 } else {
303 LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
304 for (unsigned i = 0; i < num_components; i++)
305 arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
306 value = arr;
307 }
308 }
309 assert(!src.negate);
310 assert(!src.abs);
311 return value;
312 }
313
emit_b2f(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)314 static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
315 LLVMValueRef src0,
316 unsigned bitsize)
317 {
318 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
319 LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
320 LLVMBuildBitCast(builder, lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type,
321 1.0), bld_base->int_bld.vec_type, ""),
322 "");
323 result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
324 switch (bitsize) {
325 case 16:
326 result = LLVMBuildFPTrunc(builder, result, bld_base->half_bld.vec_type, "");
327 break;
328 case 32:
329 break;
330 case 64:
331 result = LLVMBuildFPExt(builder, result, bld_base->dbl_bld.vec_type, "");
332 break;
333 default:
334 unreachable("unsupported bit size.");
335 }
336 return result;
337 }
338
emit_b2i(struct lp_build_nir_context * bld_base,LLVMValueRef src0,unsigned bitsize)339 static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base,
340 LLVMValueRef src0,
341 unsigned bitsize)
342 {
343 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
344 LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
345 lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, 1), "");
346 switch (bitsize) {
347 case 8:
348 return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
349 case 16:
350 return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
351 case 32:
352 return result;
353 case 64:
354 return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
355 default:
356 unreachable("unsupported bit size.");
357 }
358 }
359
emit_b32csel(struct lp_build_nir_context * bld_base,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])360 static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base,
361 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
362 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
363 {
364 LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
365 LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
366 struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
367 return lp_build_select(bld, v, src[1], src[2]);
368 }
369
split_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)370 static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
371 LLVMValueRef src,
372 bool hi)
373 {
374 struct gallivm_state *gallivm = bld_base->base.gallivm;
375 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
376 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
377 int len = bld_base->base.type.length * 2;
378 for (unsigned i = 0; i < bld_base->base.type.length; i++) {
379 #if UTIL_ARCH_LITTLE_ENDIAN
380 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
381 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
382 #else
383 shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
384 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
385 #endif
386 }
387
388 src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
389 return LLVMBuildShuffleVector(gallivm->builder, src,
390 LLVMGetUndef(LLVMTypeOf(src)),
391 LLVMConstVector(hi ? shuffles2 : shuffles,
392 bld_base->base.type.length),
393 "");
394 }
395
396 static LLVMValueRef
merge_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)397 merge_64bit(struct lp_build_nir_context *bld_base,
398 LLVMValueRef input,
399 LLVMValueRef input2)
400 {
401 struct gallivm_state *gallivm = bld_base->base.gallivm;
402 LLVMBuilderRef builder = gallivm->builder;
403 int i;
404 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
405 int len = bld_base->base.type.length * 2;
406 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
407
408 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
409 #if UTIL_ARCH_LITTLE_ENDIAN
410 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
411 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
412 #else
413 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
414 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
415 #endif
416 }
417 return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
418 }
419
split_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef src,bool hi)420 static LLVMValueRef split_16bit(struct lp_build_nir_context *bld_base,
421 LLVMValueRef src,
422 bool hi)
423 {
424 struct gallivm_state *gallivm = bld_base->base.gallivm;
425 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
426 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
427 int len = bld_base->base.type.length * 2;
428 for (unsigned i = 0; i < bld_base->base.type.length; i++) {
429 #if UTIL_ARCH_LITTLE_ENDIAN
430 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
431 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
432 #else
433 shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
434 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
435 #endif
436 }
437
438 src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
439 return LLVMBuildShuffleVector(gallivm->builder, src,
440 LLVMGetUndef(LLVMTypeOf(src)),
441 LLVMConstVector(hi ? shuffles2 : shuffles,
442 bld_base->base.type.length),
443 "");
444 }
445 static LLVMValueRef
merge_16bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)446 merge_16bit(struct lp_build_nir_context *bld_base,
447 LLVMValueRef input,
448 LLVMValueRef input2)
449 {
450 struct gallivm_state *gallivm = bld_base->base.gallivm;
451 LLVMBuilderRef builder = gallivm->builder;
452 int i;
453 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
454 int len = bld_base->int16_bld.type.length * 2;
455 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
456
457 for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
458 #if UTIL_ARCH_LITTLE_ENDIAN
459 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
460 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
461 #else
462 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
463 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
464 #endif
465 }
466 return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
467 }
468
get_signed_divisor(struct gallivm_state * gallivm,struct lp_build_context * int_bld,struct lp_build_context * mask_bld,int src_bit_size,LLVMValueRef src,LLVMValueRef divisor)469 static LLVMValueRef get_signed_divisor(struct gallivm_state *gallivm,
470 struct lp_build_context *int_bld,
471 struct lp_build_context *mask_bld,
472 int src_bit_size,
473 LLVMValueRef src, LLVMValueRef divisor)
474 {
475 LLVMBuilderRef builder = gallivm->builder;
476 /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
477 and divisor is -1. */
478 /* set mask if numerator == INT_MIN */
479 long long min_val;
480 switch (src_bit_size) {
481 case 8:
482 min_val = INT8_MIN;
483 break;
484 case 16:
485 min_val = INT16_MIN;
486 break;
487 default:
488 case 32:
489 min_val = INT_MIN;
490 break;
491 case 64:
492 min_val = INT64_MIN;
493 break;
494 }
495 LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
496 lp_build_const_int_vec(gallivm, int_bld->type, min_val));
497 /* set another mask if divisor is - 1 */
498 LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
499 lp_build_const_int_vec(gallivm, int_bld->type, -1));
500 div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
501
502 divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
503 return divisor;
504 }
505
506 static LLVMValueRef
do_int_divide(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)507 do_int_divide(struct lp_build_nir_context *bld_base,
508 bool is_unsigned, unsigned src_bit_size,
509 LLVMValueRef src, LLVMValueRef src2)
510 {
511 struct gallivm_state *gallivm = bld_base->base.gallivm;
512 LLVMBuilderRef builder = gallivm->builder;
513 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
514 struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
515
516 /* avoid divide by 0. Converted divisor from 0 to -1 */
517 LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
518 mask_bld->zero);
519
520 LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
521 if (!is_unsigned) {
522 divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
523 src_bit_size, src, divisor);
524 }
525 LLVMValueRef result = lp_build_div(int_bld, src, divisor);
526
527 if (!is_unsigned) {
528 LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
529 return LLVMBuildAnd(builder, not_div_mask, result, "");
530 } else
531 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
532 * may as well do same for idiv */
533 return LLVMBuildOr(builder, div_mask, result, "");
534 }
535
536 static LLVMValueRef
do_int_mod(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned src_bit_size,LLVMValueRef src,LLVMValueRef src2)537 do_int_mod(struct lp_build_nir_context *bld_base,
538 bool is_unsigned, unsigned src_bit_size,
539 LLVMValueRef src, LLVMValueRef src2)
540 {
541 struct gallivm_state *gallivm = bld_base->base.gallivm;
542 LLVMBuilderRef builder = gallivm->builder;
543 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
544 struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
545 LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
546 mask_bld->zero);
547 LLVMValueRef divisor = LLVMBuildOr(builder,
548 div_mask,
549 src2, "");
550 if (!is_unsigned) {
551 divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
552 src_bit_size, src, divisor);
553 }
554 LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
555 return LLVMBuildOr(builder, div_mask, result, "");
556 }
557
558 static LLVMValueRef
do_quantize_to_f16(struct lp_build_nir_context * bld_base,LLVMValueRef src)559 do_quantize_to_f16(struct lp_build_nir_context *bld_base,
560 LLVMValueRef src)
561 {
562 struct gallivm_state *gallivm = bld_base->base.gallivm;
563 LLVMBuilderRef builder = gallivm->builder;
564 LLVMValueRef result, cond, cond2, temp;
565
566 result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
567 result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
568
569 temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
570 cond = LLVMBuildFCmp(builder, LLVMRealOGT,
571 LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
572 temp, "");
573 cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
574 cond = LLVMBuildAnd(builder, cond, cond2, "");
575 result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
576 return result;
577 }
578
do_alu_action(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr,unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])579 static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
580 const nir_alu_instr *instr,
581 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
582 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
583 {
584 struct gallivm_state *gallivm = bld_base->base.gallivm;
585 LLVMBuilderRef builder = gallivm->builder;
586 LLVMValueRef result;
587
588 switch (instr->op) {
589 case nir_op_b2f16:
590 result = emit_b2f(bld_base, src[0], 16);
591 break;
592 case nir_op_b2f32:
593 result = emit_b2f(bld_base, src[0], 32);
594 break;
595 case nir_op_b2f64:
596 result = emit_b2f(bld_base, src[0], 64);
597 break;
598 case nir_op_b2i8:
599 result = emit_b2i(bld_base, src[0], 8);
600 break;
601 case nir_op_b2i16:
602 result = emit_b2i(bld_base, src[0], 16);
603 break;
604 case nir_op_b2i32:
605 result = emit_b2i(bld_base, src[0], 32);
606 break;
607 case nir_op_b2i64:
608 result = emit_b2i(bld_base, src[0], 64);
609 break;
610 case nir_op_b32csel:
611 result = emit_b32csel(bld_base, src_bit_size, src);
612 break;
613 case nir_op_bit_count:
614 result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
615 if (src_bit_size[0] < 32)
616 result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
617 else if (src_bit_size[0] > 32)
618 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
619 break;
620 case nir_op_bitfield_select:
621 result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
622 break;
623 case nir_op_bitfield_reverse:
624 result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
625 break;
626 case nir_op_f2b32:
627 result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
628 break;
629 case nir_op_f2f16:
630 if (src_bit_size[0] == 64)
631 src[0] = LLVMBuildFPTrunc(builder, src[0],
632 bld_base->base.vec_type, "");
633 result = LLVMBuildFPTrunc(builder, src[0],
634 bld_base->half_bld.vec_type, "");
635 break;
636 case nir_op_f2f32:
637 if (src_bit_size[0] < 32)
638 result = LLVMBuildFPExt(builder, src[0],
639 bld_base->base.vec_type, "");
640 else
641 result = LLVMBuildFPTrunc(builder, src[0],
642 bld_base->base.vec_type, "");
643 break;
644 case nir_op_f2f64:
645 result = LLVMBuildFPExt(builder, src[0],
646 bld_base->dbl_bld.vec_type, "");
647 break;
648 case nir_op_f2i8:
649 result = LLVMBuildFPToSI(builder,
650 src[0],
651 bld_base->uint8_bld.vec_type, "");
652 break;
653 case nir_op_f2i16:
654 result = LLVMBuildFPToSI(builder,
655 src[0],
656 bld_base->uint16_bld.vec_type, "");
657 break;
658 case nir_op_f2i32:
659 result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
660 break;
661 case nir_op_f2u8:
662 result = LLVMBuildFPToUI(builder,
663 src[0],
664 bld_base->uint8_bld.vec_type, "");
665 break;
666 case nir_op_f2u16:
667 result = LLVMBuildFPToUI(builder,
668 src[0],
669 bld_base->uint16_bld.vec_type, "");
670 break;
671 case nir_op_f2u32:
672 result = LLVMBuildFPToUI(builder,
673 src[0],
674 bld_base->base.int_vec_type, "");
675 break;
676 case nir_op_f2i64:
677 result = LLVMBuildFPToSI(builder,
678 src[0],
679 bld_base->int64_bld.vec_type, "");
680 break;
681 case nir_op_f2u64:
682 result = LLVMBuildFPToUI(builder,
683 src[0],
684 bld_base->uint64_bld.vec_type, "");
685 break;
686 case nir_op_fabs:
687 result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
688 break;
689 case nir_op_fadd:
690 result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
691 src[0], src[1]);
692 break;
693 case nir_op_fceil:
694 result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
695 break;
696 case nir_op_fcos:
697 result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
698 break;
699 case nir_op_fddx:
700 case nir_op_fddx_coarse:
701 case nir_op_fddx_fine:
702 result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
703 break;
704 case nir_op_fddy:
705 case nir_op_fddy_coarse:
706 case nir_op_fddy_fine:
707 result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
708 break;
709 case nir_op_fdiv:
710 result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
711 src[0], src[1]);
712 break;
713 case nir_op_feq32:
714 result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
715 break;
716 case nir_op_fexp2:
717 result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
718 break;
719 case nir_op_ffloor:
720 result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
721 break;
722 case nir_op_ffma:
723 result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
724 break;
725 case nir_op_ffract: {
726 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
727 LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
728 result = lp_build_sub(flt_bld, src[0], tmp);
729 break;
730 }
731 case nir_op_fge:
732 case nir_op_fge32:
733 result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
734 break;
735 case nir_op_find_lsb: {
736 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
737 result = lp_build_cttz(int_bld, src[0]);
738 if (src_bit_size[0] < 32)
739 result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
740 else if (src_bit_size[0] > 32)
741 result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
742 break;
743 }
744 case nir_op_fisfinite32:
745 unreachable("Should have been lowered in nir_opt_algebraic_late.");
746 case nir_op_flog2:
747 result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
748 break;
749 case nir_op_flt:
750 case nir_op_flt32:
751 result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
752 break;
753 case nir_op_fmax:
754 case nir_op_fmin: {
755 enum gallivm_nan_behavior minmax_nan;
756 int first = 0;
757
758 /* If one of the sources is known to be a number (i.e., not NaN), then
759 * better code can be generated by passing that information along.
760 */
761 if (is_a_number(bld_base->range_ht, instr, 1,
762 0 /* unused num_components */,
763 NULL /* unused swizzle */)) {
764 minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
765 } else if (is_a_number(bld_base->range_ht, instr, 0,
766 0 /* unused num_components */,
767 NULL /* unused swizzle */)) {
768 first = 1;
769 minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
770 } else {
771 minmax_nan = GALLIVM_NAN_RETURN_OTHER;
772 }
773
774 if (instr->op == nir_op_fmin) {
775 result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
776 src[first], src[1 - first], minmax_nan);
777 } else {
778 result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
779 src[first], src[1 - first], minmax_nan);
780 }
781 break;
782 }
783 case nir_op_fmod: {
784 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
785 result = lp_build_div(flt_bld, src[0], src[1]);
786 result = lp_build_floor(flt_bld, result);
787 result = lp_build_mul(flt_bld, src[1], result);
788 result = lp_build_sub(flt_bld, src[0], result);
789 break;
790 }
791 case nir_op_fmul:
792 result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
793 src[0], src[1]);
794 break;
795 case nir_op_fneu32:
796 result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
797 break;
798 case nir_op_fneg:
799 result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
800 break;
801 case nir_op_fpow:
802 result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
803 break;
804 case nir_op_fquantize2f16:
805 result = do_quantize_to_f16(bld_base, src[0]);
806 break;
807 case nir_op_frcp:
808 result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
809 break;
810 case nir_op_fround_even:
811 if (src_bit_size[0] == 16) {
812 struct lp_build_context *bld = get_flt_bld(bld_base, 16);
813 char intrinsic[64];
814 lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
815 result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
816 } else
817 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
818 break;
819 case nir_op_frsq:
820 result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
821 break;
822 case nir_op_fsat:
823 result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
824 break;
825 case nir_op_fsign:
826 result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
827 break;
828 case nir_op_fsin:
829 result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
830 break;
831 case nir_op_fsqrt:
832 result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
833 break;
834 case nir_op_ftrunc:
835 result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
836 break;
837 case nir_op_i2b32:
838 result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
839 break;
840 case nir_op_i2f16:
841 result = LLVMBuildSIToFP(builder, src[0],
842 bld_base->half_bld.vec_type, "");
843 break;
844 case nir_op_i2f32:
845 result = lp_build_int_to_float(&bld_base->base, src[0]);
846 break;
847 case nir_op_i2f64:
848 result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
849 break;
850 case nir_op_i2i8:
851 result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
852 break;
853 case nir_op_i2i16:
854 if (src_bit_size[0] < 16)
855 result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
856 else
857 result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
858 break;
859 case nir_op_i2i32:
860 if (src_bit_size[0] < 32)
861 result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
862 else
863 result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
864 break;
865 case nir_op_i2i64:
866 result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
867 break;
868 case nir_op_iabs:
869 result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
870 break;
871 case nir_op_iadd:
872 result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
873 src[0], src[1]);
874 break;
875 case nir_op_iand:
876 result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
877 src[0], src[1]);
878 break;
879 case nir_op_idiv:
880 result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
881 break;
882 case nir_op_ieq32:
883 result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
884 break;
885 case nir_op_ige32:
886 result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
887 break;
888 case nir_op_ilt32:
889 result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
890 break;
891 case nir_op_imax:
892 result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
893 break;
894 case nir_op_imin:
895 result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
896 break;
897 case nir_op_imul:
898 case nir_op_imul24:
899 result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
900 src[0], src[1]);
901 break;
902 case nir_op_imul_high: {
903 LLVMValueRef hi_bits;
904 lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
905 result = hi_bits;
906 break;
907 }
908 case nir_op_ine32:
909 result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
910 break;
911 case nir_op_ineg:
912 result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
913 break;
914 case nir_op_inot:
915 result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
916 break;
917 case nir_op_ior:
918 result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
919 src[0], src[1]);
920 break;
921 case nir_op_imod:
922 case nir_op_irem:
923 result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
924 break;
925 case nir_op_ishl: {
926 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
927 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
928 if (src_bit_size[0] == 64)
929 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
930 if (src_bit_size[0] < 32)
931 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
932 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
933 result = lp_build_shl(int_bld, src[0], src[1]);
934 break;
935 }
936 case nir_op_ishr: {
937 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
938 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
939 if (src_bit_size[0] == 64)
940 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
941 if (src_bit_size[0] < 32)
942 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
943 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
944 result = lp_build_shr(int_bld, src[0], src[1]);
945 break;
946 }
947 case nir_op_isign:
948 result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
949 break;
950 case nir_op_isub:
951 result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
952 src[0], src[1]);
953 break;
954 case nir_op_ixor:
955 result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
956 src[0], src[1]);
957 break;
958 case nir_op_mov:
959 result = src[0];
960 break;
961 case nir_op_unpack_64_2x32_split_x:
962 result = split_64bit(bld_base, src[0], false);
963 break;
964 case nir_op_unpack_64_2x32_split_y:
965 result = split_64bit(bld_base, src[0], true);
966 break;
967
968 case nir_op_pack_32_2x16_split: {
969 LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
970 result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
971 break;
972 }
973 case nir_op_unpack_32_2x16_split_x:
974 result = split_16bit(bld_base, src[0], false);
975 break;
976 case nir_op_unpack_32_2x16_split_y:
977 result = split_16bit(bld_base, src[0], true);
978 break;
979 case nir_op_pack_64_2x32_split: {
980 LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
981 result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
982 break;
983 }
984 case nir_op_pack_32_4x8_split: {
985 LLVMValueRef tmp1 = merge_16bit(bld_base, src[0], src[1]);
986 LLVMValueRef tmp2 = merge_16bit(bld_base, src[2], src[3]);
987 tmp1 = LLVMBuildBitCast(builder, tmp1, bld_base->uint16_bld.vec_type, "");
988 tmp2 = LLVMBuildBitCast(builder, tmp2, bld_base->uint16_bld.vec_type, "");
989 LLVMValueRef tmp = merge_16bit(bld_base, tmp1, tmp2);
990 result = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.vec_type, "");
991 break;
992 }
993 case nir_op_u2f16:
994 result = LLVMBuildUIToFP(builder, src[0],
995 bld_base->half_bld.vec_type, "");
996 break;
997 case nir_op_u2f32:
998 result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
999 break;
1000 case nir_op_u2f64:
1001 result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
1002 break;
1003 case nir_op_u2u8:
1004 result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
1005 break;
1006 case nir_op_u2u16:
1007 if (src_bit_size[0] < 16)
1008 result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
1009 else
1010 result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
1011 break;
1012 case nir_op_u2u32:
1013 if (src_bit_size[0] < 32)
1014 result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
1015 else
1016 result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
1017 break;
1018 case nir_op_u2u64:
1019 result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
1020 break;
1021 case nir_op_udiv:
1022 result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
1023 break;
1024 case nir_op_ufind_msb: {
1025 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1026 result = lp_build_ctlz(uint_bld, src[0]);
1027 result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
1028 if (src_bit_size[0] < 32)
1029 result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
1030 else
1031 result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
1032 break;
1033 }
1034 case nir_op_uge32:
1035 result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
1036 break;
1037 case nir_op_ult32:
1038 result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
1039 break;
1040 case nir_op_umax:
1041 result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1042 break;
1043 case nir_op_umin:
1044 result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
1045 break;
1046 case nir_op_umod:
1047 result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
1048 break;
1049 case nir_op_umul_high: {
1050 LLVMValueRef hi_bits;
1051 lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
1052 result = hi_bits;
1053 break;
1054 }
1055 case nir_op_ushr: {
1056 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
1057 if (src_bit_size[0] == 64)
1058 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
1059 if (src_bit_size[0] < 32)
1060 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
1061 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
1062 result = lp_build_shr(uint_bld, src[0], src[1]);
1063 break;
1064 }
1065 case nir_op_bcsel: {
1066 LLVMTypeRef src1_type = LLVMTypeOf(src[1]);
1067 LLVMTypeRef src2_type = LLVMTypeOf(src[2]);
1068
1069 if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
1070 LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
1071 src[2] = LLVMBuildIntToPtr(builder, src[2], src1_type, "");
1072 } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
1073 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
1074 src[1] = LLVMBuildIntToPtr(builder, src[1], src2_type, "");
1075 }
1076
1077 for (int i = 1; i <= 2; i++) {
1078 LLVMTypeRef type = LLVMTypeOf(src[i]);
1079 if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
1080 break;
1081 src[i] = LLVMBuildBitCast(builder, src[i], get_int_bld(bld_base, true, src_bit_size[i])->vec_type, "");
1082 }
1083 return LLVMBuildSelect(builder, src[0], src[1], src[2], "");
1084 }
1085 default:
1086 assert(0);
1087 break;
1088 }
1089 return result;
1090 }
1091
visit_alu(struct lp_build_nir_context * bld_base,const nir_alu_instr * instr)1092 static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr)
1093 {
1094 struct gallivm_state *gallivm = bld_base->base.gallivm;
1095 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
1096 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
1097 unsigned num_components = nir_dest_num_components(instr->dest.dest);
1098 unsigned src_components;
1099 switch (instr->op) {
1100 case nir_op_vec2:
1101 case nir_op_vec3:
1102 case nir_op_vec4:
1103 case nir_op_vec8:
1104 case nir_op_vec16:
1105 src_components = 1;
1106 break;
1107 case nir_op_pack_half_2x16:
1108 src_components = 2;
1109 break;
1110 case nir_op_unpack_half_2x16:
1111 src_components = 1;
1112 break;
1113 case nir_op_cube_face_coord_amd:
1114 case nir_op_cube_face_index_amd:
1115 src_components = 3;
1116 break;
1117 case nir_op_fsum2:
1118 case nir_op_fsum3:
1119 case nir_op_fsum4:
1120 src_components = nir_op_infos[instr->op].input_sizes[0];
1121 break;
1122 default:
1123 src_components = num_components;
1124 break;
1125 }
1126 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1127 src[i] = get_alu_src(bld_base, instr->src[i], src_components);
1128 src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
1129 }
1130
1131 if (instr->op == nir_op_mov && is_aos(bld_base) && !instr->dest.dest.is_ssa) {
1132 for (unsigned i = 0; i < 4; i++) {
1133 if (instr->dest.write_mask & (1 << i)) {
1134 assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src);
1135 }
1136 }
1137 return;
1138 }
1139
1140 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1141 if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
1142 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1143 result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1144 }
1145 } else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) {
1146 for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
1147 LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
1148 src[0], c, "");
1149 temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
1150 result[0] = (c == 0) ? temp_chan : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), result[0], temp_chan);
1151 }
1152 } else if (is_aos(bld_base)) {
1153 if (instr->op == nir_op_fmul) {
1154 if (LLVMIsConstant(src[0]))
1155 src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1);
1156 if (LLVMIsConstant(src[1]))
1157 src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1);
1158 }
1159 result[0] = do_alu_action(bld_base, instr, src_bit_size, src);
1160 } else {
1161 for (unsigned c = 0; c < num_components; c++) {
1162 LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
1163
1164 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1165 if (num_components > 1) {
1166 src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
1167 src[i], c, "");
1168 } else
1169 src_chan[i] = src[i];
1170 src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1171 }
1172 result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
1173 result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
1174 }
1175 }
1176 assign_alu_dest(bld_base, &instr->dest, result);
1177 }
1178
visit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr)1179 static void visit_load_const(struct lp_build_nir_context *bld_base,
1180 const nir_load_const_instr *instr)
1181 {
1182 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1183 bld_base->load_const(bld_base, instr, result);
1184 assign_ssa_dest(bld_base, &instr->def, result);
1185 }
1186
1187 static void
get_deref_offset(struct lp_build_nir_context * bld_base,nir_deref_instr * instr,bool vs_in,unsigned * vertex_index_out,LLVMValueRef * vertex_index_ref,unsigned * const_out,LLVMValueRef * indir_out)1188 get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1189 bool vs_in, unsigned *vertex_index_out,
1190 LLVMValueRef *vertex_index_ref,
1191 unsigned *const_out, LLVMValueRef *indir_out)
1192 {
1193 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1194 nir_variable *var = nir_deref_instr_get_variable(instr);
1195 nir_deref_path path;
1196 unsigned idx_lvl = 1;
1197
1198 nir_deref_path_init(&path, instr, NULL);
1199
1200 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1201 if (vertex_index_ref) {
1202 *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1203 if (vertex_index_out)
1204 *vertex_index_out = 0;
1205 } else {
1206 *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1207 }
1208 ++idx_lvl;
1209 }
1210
1211 uint32_t const_offset = 0;
1212 LLVMValueRef offset = NULL;
1213
1214 if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1215 assert(instr->deref_type == nir_deref_type_array);
1216 const_offset = nir_src_as_uint(instr->arr.index);
1217 goto out;
1218 }
1219
1220 for (; path.path[idx_lvl]; ++idx_lvl) {
1221 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1222 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1223 unsigned index = path.path[idx_lvl]->strct.index;
1224
1225 for (unsigned i = 0; i < index; i++) {
1226 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1227 const_offset += glsl_count_attribute_slots(ft, vs_in);
1228 }
1229 } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1230 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1231 if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1232 const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1233 } else {
1234 LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1235 idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1236 LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1237 idx_src);
1238 if (offset)
1239 offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1240 else
1241 offset = array_off;
1242 }
1243 } else
1244 unreachable("Uhandled deref type in get_deref_instr_offset");
1245 }
1246
1247 out:
1248 nir_deref_path_finish(&path);
1249
1250 if (const_offset && offset)
1251 offset = LLVMBuildAdd(builder, offset,
1252 lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1253 "");
1254 *const_out = const_offset;
1255 *indir_out = offset;
1256 }
1257
1258 static void
visit_load_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1259 visit_load_input(struct lp_build_nir_context *bld_base,
1260 nir_intrinsic_instr *instr,
1261 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1262 {
1263 nir_variable var = {0};
1264 var.data.location = nir_intrinsic_io_semantics(instr).location;
1265 var.data.driver_location = nir_intrinsic_base(instr);
1266 var.data.location_frac = nir_intrinsic_component(instr);
1267
1268 unsigned nc = nir_dest_num_components(instr->dest);
1269 unsigned bit_size = nir_dest_bit_size(instr->dest);
1270
1271 nir_src offset = *nir_get_io_offset_src(instr);
1272 bool indirect = !nir_src_is_const(offset);
1273 if (!indirect)
1274 assert(nir_src_as_uint(offset) == 0);
1275 LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1276
1277 bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1278 }
1279
1280 static void
visit_store_output(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1281 visit_store_output(struct lp_build_nir_context *bld_base,
1282 nir_intrinsic_instr *instr)
1283 {
1284 nir_variable var = {0};
1285 var.data.location = nir_intrinsic_io_semantics(instr).location;
1286 var.data.driver_location = nir_intrinsic_base(instr);
1287 var.data.location_frac = nir_intrinsic_component(instr);
1288
1289 unsigned mask = nir_intrinsic_write_mask(instr);
1290
1291 unsigned bit_size = nir_src_bit_size(instr->src[0]);
1292 LLVMValueRef src = get_src(bld_base, instr->src[0]);
1293
1294 nir_src offset = *nir_get_io_offset_src(instr);
1295 bool indirect = !nir_src_is_const(offset);
1296 if (!indirect)
1297 assert(nir_src_as_uint(offset) == 0);
1298 LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1299
1300 if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1301 src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1302 src, 0, "");
1303 }
1304
1305 bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1306 bit_size, &var, mask, NULL, 0, indir_index, src);
1307 }
1308
visit_load_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1309 static void visit_load_var(struct lp_build_nir_context *bld_base,
1310 nir_intrinsic_instr *instr,
1311 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1312 {
1313 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1314 nir_variable *var = nir_deref_instr_get_variable(deref);
1315 assert(util_bitcount(deref->modes) == 1);
1316 nir_variable_mode mode = deref->modes;
1317 unsigned const_index;
1318 LLVMValueRef indir_index;
1319 LLVMValueRef indir_vertex_index = NULL;
1320 unsigned vertex_index = 0;
1321 unsigned nc = nir_dest_num_components(instr->dest);
1322 unsigned bit_size = nir_dest_bit_size(instr->dest);
1323 if (var) {
1324 bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1325 var->data.mode == nir_var_shader_in;
1326 bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1327 var->data.mode == nir_var_shader_in;
1328 bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1329 var->data.mode == nir_var_shader_in;
1330 bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1331 var->data.mode == nir_var_shader_out && !var->data.patch;
1332 bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1333 var->data.mode == nir_var_shader_in && !var->data.patch;
1334
1335 mode = var->data.mode;
1336
1337 get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1338 &const_index, &indir_index);
1339 }
1340 bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, indir_vertex_index, const_index, indir_index, result);
1341 }
1342
1343 static void
visit_store_var(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1344 visit_store_var(struct lp_build_nir_context *bld_base,
1345 nir_intrinsic_instr *instr)
1346 {
1347 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1348 nir_variable *var = nir_deref_instr_get_variable(deref);
1349 assert(util_bitcount(deref->modes) == 1);
1350 nir_variable_mode mode = deref->modes;
1351 int writemask = instr->const_index[0];
1352 unsigned bit_size = nir_src_bit_size(instr->src[1]);
1353 LLVMValueRef src = get_src(bld_base, instr->src[1]);
1354 unsigned const_index = 0;
1355 LLVMValueRef indir_index, indir_vertex_index = NULL;
1356 if (var) {
1357 bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1358 var->data.mode == nir_var_shader_out && !var->data.patch;
1359 get_deref_offset(bld_base, deref, false, NULL, tcs_out ? &indir_vertex_index : NULL,
1360 &const_index, &indir_index);
1361 }
1362 bld_base->store_var(bld_base, mode, instr->num_components, bit_size, var, writemask, indir_vertex_index, const_index, indir_index, src);
1363 }
1364
visit_load_ubo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1365 static void visit_load_ubo(struct lp_build_nir_context *bld_base,
1366 nir_intrinsic_instr *instr,
1367 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1368 {
1369 struct gallivm_state *gallivm = bld_base->base.gallivm;
1370 LLVMBuilderRef builder = gallivm->builder;
1371 LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1372 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1373
1374 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]);
1375 idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1376 bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1377 offset_is_uniform, idx, offset, result);
1378 }
1379
visit_load_push_constant(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[4])1380 static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
1381 nir_intrinsic_instr *instr,
1382 LLVMValueRef result[4])
1383 {
1384 struct gallivm_state *gallivm = bld_base->base.gallivm;
1385 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1386 LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1387 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1388
1389 bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1390 offset_is_uniform, idx, offset, result);
1391 }
1392
1393
visit_load_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1394 static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
1395 nir_intrinsic_instr *instr,
1396 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1397 {
1398 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1399 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1400 bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1401 idx, offset, result);
1402 }
1403
visit_store_ssbo(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1404 static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
1405 nir_intrinsic_instr *instr)
1406 {
1407 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1408 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
1409 LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1410 int writemask = instr->const_index[0];
1411 int nc = nir_src_num_components(instr->src[0]);
1412 int bitsize = nir_src_bit_size(instr->src[0]);
1413 bld_base->store_mem(bld_base, writemask, nc, bitsize, idx, offset, val);
1414 }
1415
visit_get_ssbo_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1416 static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1417 nir_intrinsic_instr *instr,
1418 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1419 {
1420 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1421 result[0] = bld_base->get_ssbo_size(bld_base, idx);
1422 }
1423
visit_ssbo_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1424 static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1425 nir_intrinsic_instr *instr,
1426 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1427 {
1428 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
1429 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1430 LLVMValueRef val = get_src(bld_base, instr->src[2]);
1431 LLVMValueRef val2 = NULL;
1432 int bitsize = nir_src_bit_size(instr->src[2]);
1433 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1434 val2 = get_src(bld_base, instr->src[3]);
1435
1436 bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx, offset, val, val2, &result[0]);
1437
1438 }
1439
visit_load_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1440 static void visit_load_image(struct lp_build_nir_context *bld_base,
1441 nir_intrinsic_instr *instr,
1442 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1443 {
1444 struct gallivm_state *gallivm = bld_base->base.gallivm;
1445 LLVMBuilderRef builder = gallivm->builder;
1446 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1447 nir_variable *var = nir_deref_instr_get_variable(deref);
1448 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1449 LLVMValueRef coords[5];
1450 struct lp_img_params params;
1451 const struct glsl_type *type = glsl_without_array(var->type);
1452 unsigned const_index;
1453 LLVMValueRef indir_index;
1454 get_deref_offset(bld_base, deref, false, NULL, NULL,
1455 &const_index, &indir_index);
1456
1457 memset(¶ms, 0, sizeof(params));
1458 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1459 for (unsigned i = 0; i < 4; i++)
1460 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1461 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1462 coords[2] = coords[1];
1463
1464 params.coords = coords;
1465 params.outdata = result;
1466 params.img_op = LP_IMG_LOAD;
1467 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS)
1468 params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]), nir_type_uint, 32);
1469 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1470 params.image_index_offset = indir_index;
1471 bld_base->image_op(bld_base, ¶ms);
1472 }
1473
visit_store_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1474 static void visit_store_image(struct lp_build_nir_context *bld_base,
1475 nir_intrinsic_instr *instr)
1476 {
1477 struct gallivm_state *gallivm = bld_base->base.gallivm;
1478 LLVMBuilderRef builder = gallivm->builder;
1479 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1480 nir_variable *var = nir_deref_instr_get_variable(deref);
1481 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1482 LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1483 LLVMValueRef coords[5];
1484 struct lp_img_params params;
1485 const struct glsl_type *type = glsl_without_array(var->type);
1486 unsigned const_index;
1487 LLVMValueRef indir_index;
1488 get_deref_offset(bld_base, deref, false, NULL, NULL,
1489 &const_index, &indir_index);
1490
1491 memset(¶ms, 0, sizeof(params));
1492 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1493 for (unsigned i = 0; i < 4; i++)
1494 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1495 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1496 coords[2] = coords[1];
1497 params.coords = coords;
1498
1499 for (unsigned i = 0; i < 4; i++) {
1500 params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1501 params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1502 }
1503 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1504 params.ms_index = get_src(bld_base, instr->src[2]);
1505 params.img_op = LP_IMG_STORE;
1506 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1507 params.image_index_offset = indir_index;
1508
1509 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1510 coords[2] = coords[1];
1511 bld_base->image_op(bld_base, ¶ms);
1512 }
1513
visit_atomic_image(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1514 static void visit_atomic_image(struct lp_build_nir_context *bld_base,
1515 nir_intrinsic_instr *instr,
1516 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1517 {
1518 struct gallivm_state *gallivm = bld_base->base.gallivm;
1519 LLVMBuilderRef builder = gallivm->builder;
1520 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1521 nir_variable *var = nir_deref_instr_get_variable(deref);
1522 struct lp_img_params params;
1523 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1524 LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1525 LLVMValueRef coords[5];
1526 const struct glsl_type *type = glsl_without_array(var->type);
1527 unsigned const_index;
1528 LLVMValueRef indir_index;
1529 get_deref_offset(bld_base, deref, false, NULL, NULL,
1530 &const_index, &indir_index);
1531
1532 memset(¶ms, 0, sizeof(params));
1533
1534 switch (instr->intrinsic) {
1535 case nir_intrinsic_image_deref_atomic_add:
1536 params.op = LLVMAtomicRMWBinOpAdd;
1537 break;
1538 case nir_intrinsic_image_deref_atomic_exchange:
1539 params.op = LLVMAtomicRMWBinOpXchg;
1540 break;
1541 case nir_intrinsic_image_deref_atomic_and:
1542 params.op = LLVMAtomicRMWBinOpAnd;
1543 break;
1544 case nir_intrinsic_image_deref_atomic_or:
1545 params.op = LLVMAtomicRMWBinOpOr;
1546 break;
1547 case nir_intrinsic_image_deref_atomic_xor:
1548 params.op = LLVMAtomicRMWBinOpXor;
1549 break;
1550 case nir_intrinsic_image_deref_atomic_umin:
1551 params.op = LLVMAtomicRMWBinOpUMin;
1552 break;
1553 case nir_intrinsic_image_deref_atomic_umax:
1554 params.op = LLVMAtomicRMWBinOpUMax;
1555 break;
1556 case nir_intrinsic_image_deref_atomic_imin:
1557 params.op = LLVMAtomicRMWBinOpMin;
1558 break;
1559 case nir_intrinsic_image_deref_atomic_imax:
1560 params.op = LLVMAtomicRMWBinOpMax;
1561 break;
1562 default:
1563 break;
1564 }
1565
1566 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1567 for (unsigned i = 0; i < 4; i++)
1568 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1569 if (params.target == PIPE_TEXTURE_1D_ARRAY)
1570 coords[2] = coords[1];
1571 params.coords = coords;
1572 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1573 params.ms_index = get_src(bld_base, instr->src[2]);
1574 if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1575 LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1576 params.indata[0] = in_val;
1577 params.indata2[0] = cas_val;
1578 } else
1579 params.indata[0] = in_val;
1580
1581 params.outdata = result;
1582 params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1583 params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1584 params.image_index_offset = indir_index;
1585
1586 bld_base->image_op(bld_base, ¶ms);
1587 }
1588
1589
visit_image_size(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1590 static void visit_image_size(struct lp_build_nir_context *bld_base,
1591 nir_intrinsic_instr *instr,
1592 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1593 {
1594 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1595 nir_variable *var = nir_deref_instr_get_variable(deref);
1596 struct lp_sampler_size_query_params params = { 0 };
1597 unsigned const_index;
1598 LLVMValueRef indir_index;
1599 const struct glsl_type *type = glsl_without_array(var->type);
1600 get_deref_offset(bld_base, deref, false, NULL, NULL,
1601 &const_index, &indir_index);
1602 params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1603 params.texture_unit_offset = indir_index;
1604 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1605 params.sizes_out = result;
1606
1607 bld_base->image_size(bld_base, ¶ms);
1608 }
1609
visit_image_samples(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1610 static void visit_image_samples(struct lp_build_nir_context *bld_base,
1611 nir_intrinsic_instr *instr,
1612 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1613 {
1614 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1615 nir_variable *var = nir_deref_instr_get_variable(deref);
1616 struct lp_sampler_size_query_params params = { 0 };
1617 unsigned const_index;
1618 LLVMValueRef indir_index;
1619 const struct glsl_type *type = glsl_without_array(var->type);
1620 get_deref_offset(bld_base, deref, false, NULL, NULL,
1621 &const_index, &indir_index);
1622
1623 params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1624 params.texture_unit_offset = indir_index;
1625 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1626 params.sizes_out = result;
1627 params.samples_only = true;
1628
1629 bld_base->image_size(bld_base, ¶ms);
1630 }
1631
visit_shared_load(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1632 static void visit_shared_load(struct lp_build_nir_context *bld_base,
1633 nir_intrinsic_instr *instr,
1634 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1635 {
1636 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1637 bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1638 NULL, offset, result);
1639 }
1640
visit_shared_store(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1641 static void visit_shared_store(struct lp_build_nir_context *bld_base,
1642 nir_intrinsic_instr *instr)
1643 {
1644 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1645 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1646 int writemask = instr->const_index[1];
1647 int nc = nir_src_num_components(instr->src[0]);
1648 int bitsize = nir_src_bit_size(instr->src[0]);
1649 bld_base->store_mem(bld_base, writemask, nc, bitsize, NULL, offset, val);
1650 }
1651
visit_shared_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1652 static void visit_shared_atomic(struct lp_build_nir_context *bld_base,
1653 nir_intrinsic_instr *instr,
1654 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1655 {
1656 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1657 LLVMValueRef val = get_src(bld_base, instr->src[1]);
1658 LLVMValueRef val2 = NULL;
1659 int bitsize = nir_src_bit_size(instr->src[1]);
1660 if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1661 val2 = get_src(bld_base, instr->src[2]);
1662
1663 bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]);
1664
1665 }
1666
visit_barrier(struct lp_build_nir_context * bld_base)1667 static void visit_barrier(struct lp_build_nir_context *bld_base)
1668 {
1669 bld_base->barrier(bld_base);
1670 }
1671
visit_discard(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1672 static void visit_discard(struct lp_build_nir_context *bld_base,
1673 nir_intrinsic_instr *instr)
1674 {
1675 LLVMValueRef cond = NULL;
1676 if (instr->intrinsic == nir_intrinsic_discard_if) {
1677 cond = get_src(bld_base, instr->src[0]);
1678 cond = cast_type(bld_base, cond, nir_type_int, 32);
1679 }
1680 bld_base->discard(bld_base, cond);
1681 }
1682
visit_load_kernel_input(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1683 static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1684 nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1685 {
1686 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1687
1688 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]);
1689 bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1690 nir_src_bit_size(instr->src[0]),
1691 offset_is_uniform, offset, result);
1692 }
1693
visit_load_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1694 static void visit_load_global(struct lp_build_nir_context *bld_base,
1695 nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1696 {
1697 LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1698 bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1699 nir_src_bit_size(instr->src[0]),
1700 addr, result);
1701 }
1702
visit_store_global(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1703 static void visit_store_global(struct lp_build_nir_context *bld_base,
1704 nir_intrinsic_instr *instr)
1705 {
1706 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1707 int nc = nir_src_num_components(instr->src[0]);
1708 int bitsize = nir_src_bit_size(instr->src[0]);
1709 LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1710 int addr_bitsize = nir_src_bit_size(instr->src[1]);
1711 int writemask = instr->const_index[0];
1712 bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
1713 }
1714
visit_global_atomic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1715 static void visit_global_atomic(struct lp_build_nir_context *bld_base,
1716 nir_intrinsic_instr *instr,
1717 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1718 {
1719 LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1720 LLVMValueRef val = get_src(bld_base, instr->src[1]);
1721 LLVMValueRef val2 = NULL;
1722 int addr_bitsize = nir_src_bit_size(instr->src[0]);
1723 int val_bitsize = nir_src_bit_size(instr->src[1]);
1724 if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1725 val2 = get_src(bld_base, instr->src[2]);
1726
1727 bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
1728 val_bitsize, addr, val, val2, &result[0]);
1729 }
1730
visit_interp(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1731 static void visit_interp(struct lp_build_nir_context *bld_base,
1732 nir_intrinsic_instr *instr,
1733 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1734 {
1735 struct gallivm_state *gallivm = bld_base->base.gallivm;
1736 LLVMBuilderRef builder = gallivm->builder;
1737 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1738 unsigned num_components = nir_dest_num_components(instr->dest);
1739 nir_variable *var = nir_deref_instr_get_variable(deref);
1740 unsigned const_index;
1741 LLVMValueRef indir_index;
1742 LLVMValueRef offsets[2] = { NULL, NULL };
1743 get_deref_offset(bld_base, deref, false, NULL, NULL,
1744 &const_index, &indir_index);
1745 bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1746 bool sample = false;
1747 if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1748 for (unsigned i = 0; i < 2; i++) {
1749 offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1750 offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1751 }
1752 } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1753 offsets[0] = get_src(bld_base, instr->src[1]);
1754 offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1755 sample = true;
1756 }
1757 bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
1758 }
1759
visit_load_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1760 static void visit_load_scratch(struct lp_build_nir_context *bld_base,
1761 nir_intrinsic_instr *instr,
1762 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1763 {
1764 LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1765
1766 bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1767 nir_dest_bit_size(instr->dest), offset, result);
1768 }
1769
visit_store_scratch(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1770 static void visit_store_scratch(struct lp_build_nir_context *bld_base,
1771 nir_intrinsic_instr *instr)
1772 {
1773 LLVMValueRef val = get_src(bld_base, instr->src[0]);
1774 LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1775 int writemask = instr->const_index[2];
1776 int nc = nir_src_num_components(instr->src[0]);
1777 int bitsize = nir_src_bit_size(instr->src[0]);
1778 bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1779 }
1780
1781
visit_intrinsic(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr)1782 static void visit_intrinsic(struct lp_build_nir_context *bld_base,
1783 nir_intrinsic_instr *instr)
1784 {
1785 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
1786 switch (instr->intrinsic) {
1787 case nir_intrinsic_load_input:
1788 visit_load_input(bld_base, instr, result);
1789 break;
1790 case nir_intrinsic_store_output:
1791 visit_store_output(bld_base, instr);
1792 break;
1793 case nir_intrinsic_load_deref:
1794 visit_load_var(bld_base, instr, result);
1795 break;
1796 case nir_intrinsic_store_deref:
1797 visit_store_var(bld_base, instr);
1798 break;
1799 case nir_intrinsic_load_ubo:
1800 visit_load_ubo(bld_base, instr, result);
1801 break;
1802 case nir_intrinsic_load_push_constant:
1803 visit_load_push_constant(bld_base, instr, result);
1804 break;
1805 case nir_intrinsic_load_ssbo:
1806 visit_load_ssbo(bld_base, instr, result);
1807 break;
1808 case nir_intrinsic_store_ssbo:
1809 visit_store_ssbo(bld_base, instr);
1810 break;
1811 case nir_intrinsic_get_ssbo_size:
1812 visit_get_ssbo_size(bld_base, instr, result);
1813 break;
1814 case nir_intrinsic_load_vertex_id:
1815 case nir_intrinsic_load_primitive_id:
1816 case nir_intrinsic_load_instance_id:
1817 case nir_intrinsic_load_base_instance:
1818 case nir_intrinsic_load_base_vertex:
1819 case nir_intrinsic_load_first_vertex:
1820 case nir_intrinsic_load_workgroup_id:
1821 case nir_intrinsic_load_local_invocation_id:
1822 case nir_intrinsic_load_local_invocation_index:
1823 case nir_intrinsic_load_num_workgroups:
1824 case nir_intrinsic_load_invocation_id:
1825 case nir_intrinsic_load_front_face:
1826 case nir_intrinsic_load_draw_id:
1827 case nir_intrinsic_load_workgroup_size:
1828 case nir_intrinsic_load_work_dim:
1829 case nir_intrinsic_load_tess_coord:
1830 case nir_intrinsic_load_tess_level_outer:
1831 case nir_intrinsic_load_tess_level_inner:
1832 case nir_intrinsic_load_patch_vertices_in:
1833 case nir_intrinsic_load_sample_id:
1834 case nir_intrinsic_load_sample_pos:
1835 case nir_intrinsic_load_sample_mask_in:
1836 case nir_intrinsic_load_view_index:
1837 case nir_intrinsic_load_subgroup_invocation:
1838 case nir_intrinsic_load_subgroup_id:
1839 case nir_intrinsic_load_num_subgroups:
1840 bld_base->sysval_intrin(bld_base, instr, result);
1841 break;
1842 case nir_intrinsic_load_helper_invocation:
1843 bld_base->helper_invocation(bld_base, &result[0]);
1844 break;
1845 case nir_intrinsic_discard_if:
1846 case nir_intrinsic_discard:
1847 visit_discard(bld_base, instr);
1848 break;
1849 case nir_intrinsic_emit_vertex:
1850 bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
1851 break;
1852 case nir_intrinsic_end_primitive:
1853 bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
1854 break;
1855 case nir_intrinsic_ssbo_atomic_add:
1856 case nir_intrinsic_ssbo_atomic_imin:
1857 case nir_intrinsic_ssbo_atomic_imax:
1858 case nir_intrinsic_ssbo_atomic_umin:
1859 case nir_intrinsic_ssbo_atomic_umax:
1860 case nir_intrinsic_ssbo_atomic_and:
1861 case nir_intrinsic_ssbo_atomic_or:
1862 case nir_intrinsic_ssbo_atomic_xor:
1863 case nir_intrinsic_ssbo_atomic_exchange:
1864 case nir_intrinsic_ssbo_atomic_comp_swap:
1865 visit_ssbo_atomic(bld_base, instr, result);
1866 break;
1867 case nir_intrinsic_image_deref_load:
1868 visit_load_image(bld_base, instr, result);
1869 break;
1870 case nir_intrinsic_image_deref_store:
1871 visit_store_image(bld_base, instr);
1872 break;
1873 case nir_intrinsic_image_deref_atomic_add:
1874 case nir_intrinsic_image_deref_atomic_imin:
1875 case nir_intrinsic_image_deref_atomic_imax:
1876 case nir_intrinsic_image_deref_atomic_umin:
1877 case nir_intrinsic_image_deref_atomic_umax:
1878 case nir_intrinsic_image_deref_atomic_and:
1879 case nir_intrinsic_image_deref_atomic_or:
1880 case nir_intrinsic_image_deref_atomic_xor:
1881 case nir_intrinsic_image_deref_atomic_exchange:
1882 case nir_intrinsic_image_deref_atomic_comp_swap:
1883 visit_atomic_image(bld_base, instr, result);
1884 break;
1885 case nir_intrinsic_image_deref_size:
1886 visit_image_size(bld_base, instr, result);
1887 break;
1888 case nir_intrinsic_image_deref_samples:
1889 visit_image_samples(bld_base, instr, result);
1890 break;
1891 case nir_intrinsic_load_shared:
1892 visit_shared_load(bld_base, instr, result);
1893 break;
1894 case nir_intrinsic_store_shared:
1895 visit_shared_store(bld_base, instr);
1896 break;
1897 case nir_intrinsic_shared_atomic_add:
1898 case nir_intrinsic_shared_atomic_imin:
1899 case nir_intrinsic_shared_atomic_umin:
1900 case nir_intrinsic_shared_atomic_imax:
1901 case nir_intrinsic_shared_atomic_umax:
1902 case nir_intrinsic_shared_atomic_and:
1903 case nir_intrinsic_shared_atomic_or:
1904 case nir_intrinsic_shared_atomic_xor:
1905 case nir_intrinsic_shared_atomic_exchange:
1906 case nir_intrinsic_shared_atomic_comp_swap:
1907 visit_shared_atomic(bld_base, instr, result);
1908 break;
1909 case nir_intrinsic_control_barrier:
1910 case nir_intrinsic_scoped_barrier:
1911 visit_barrier(bld_base);
1912 break;
1913 case nir_intrinsic_group_memory_barrier:
1914 case nir_intrinsic_memory_barrier:
1915 case nir_intrinsic_memory_barrier_shared:
1916 case nir_intrinsic_memory_barrier_buffer:
1917 case nir_intrinsic_memory_barrier_image:
1918 case nir_intrinsic_memory_barrier_tcs_patch:
1919 break;
1920 case nir_intrinsic_load_kernel_input:
1921 visit_load_kernel_input(bld_base, instr, result);
1922 break;
1923 case nir_intrinsic_load_global:
1924 case nir_intrinsic_load_global_constant:
1925 visit_load_global(bld_base, instr, result);
1926 break;
1927 case nir_intrinsic_store_global:
1928 visit_store_global(bld_base, instr);
1929 break;
1930 case nir_intrinsic_global_atomic_add:
1931 case nir_intrinsic_global_atomic_imin:
1932 case nir_intrinsic_global_atomic_umin:
1933 case nir_intrinsic_global_atomic_imax:
1934 case nir_intrinsic_global_atomic_umax:
1935 case nir_intrinsic_global_atomic_and:
1936 case nir_intrinsic_global_atomic_or:
1937 case nir_intrinsic_global_atomic_xor:
1938 case nir_intrinsic_global_atomic_exchange:
1939 case nir_intrinsic_global_atomic_comp_swap:
1940 visit_global_atomic(bld_base, instr, result);
1941 break;
1942 case nir_intrinsic_vote_all:
1943 case nir_intrinsic_vote_any:
1944 case nir_intrinsic_vote_ieq:
1945 case nir_intrinsic_vote_feq:
1946 bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1947 break;
1948 case nir_intrinsic_elect:
1949 bld_base->elect(bld_base, result);
1950 break;
1951 case nir_intrinsic_reduce:
1952 case nir_intrinsic_inclusive_scan:
1953 case nir_intrinsic_exclusive_scan:
1954 bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1955 break;
1956 case nir_intrinsic_ballot:
1957 bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
1958 break;
1959 case nir_intrinsic_read_invocation:
1960 case nir_intrinsic_read_first_invocation: {
1961 LLVMValueRef src1 = NULL;
1962
1963 if (instr->intrinsic == nir_intrinsic_read_invocation)
1964 src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32);
1965 bld_base->read_invocation(bld_base, get_src(bld_base, instr->src[0]), nir_src_bit_size(instr->src[0]), src1, result);
1966 break;
1967 }
1968 case nir_intrinsic_interp_deref_at_offset:
1969 case nir_intrinsic_interp_deref_at_centroid:
1970 case nir_intrinsic_interp_deref_at_sample:
1971 visit_interp(bld_base, instr, result);
1972 break;
1973 case nir_intrinsic_load_scratch:
1974 visit_load_scratch(bld_base, instr, result);
1975 break;
1976 case nir_intrinsic_store_scratch:
1977 visit_store_scratch(bld_base, instr);
1978 break;
1979 default:
1980 fprintf(stderr, "Unsupported intrinsic: ");
1981 nir_print_instr(&instr->instr, stderr);
1982 fprintf(stderr, "\n");
1983 assert(0);
1984 break;
1985 }
1986 if (result[0]) {
1987 assign_dest(bld_base, &instr->dest, result);
1988 }
1989 }
1990
visit_txs(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)1991 static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1992 {
1993 struct lp_sampler_size_query_params params = { 0 };
1994 LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
1995 LLVMValueRef explicit_lod = NULL;
1996 LLVMValueRef texture_unit_offset = NULL;
1997 for (unsigned i = 0; i < instr->num_srcs; i++) {
1998 switch (instr->src[i].src_type) {
1999 case nir_tex_src_lod:
2000 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2001 break;
2002 case nir_tex_src_texture_offset:
2003 texture_unit_offset = get_src(bld_base, instr->src[i].src);
2004 break;
2005 default:
2006 break;
2007 }
2008 }
2009
2010 params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
2011 params.texture_unit = instr->texture_index;
2012 params.explicit_lod = explicit_lod;
2013 params.is_sviewinfo = TRUE;
2014 params.sizes_out = sizes_out;
2015 params.samples_only = (instr->op == nir_texop_texture_samples);
2016 params.texture_unit_offset = texture_unit_offset;
2017
2018 if (instr->op == nir_texop_query_levels)
2019 params.explicit_lod = bld_base->uint_bld.zero;
2020 bld_base->tex_size(bld_base, ¶ms);
2021 assign_dest(bld_base, &instr->dest, &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
2022 }
2023
lp_build_nir_lod_property(struct lp_build_nir_context * bld_base,nir_src lod_src)2024 static enum lp_sampler_lod_property lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
2025 nir_src lod_src)
2026 {
2027 enum lp_sampler_lod_property lod_property;
2028
2029 if (nir_src_is_always_uniform(lod_src))
2030 lod_property = LP_SAMPLER_LOD_SCALAR;
2031 else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2032 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2033 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2034 else
2035 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2036 }
2037 else
2038 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2039 return lod_property;
2040 }
2041
visit_tex(struct lp_build_nir_context * bld_base,nir_tex_instr * instr)2042 static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
2043 {
2044 struct gallivm_state *gallivm = bld_base->base.gallivm;
2045 LLVMBuilderRef builder = gallivm->builder;
2046 LLVMValueRef coords[5];
2047 LLVMValueRef offsets[3] = { NULL };
2048 LLVMValueRef explicit_lod = NULL, ms_index = NULL;
2049 struct lp_sampler_params params;
2050 struct lp_derivatives derivs;
2051 unsigned sample_key = 0;
2052 nir_deref_instr *texture_deref_instr = NULL;
2053 nir_deref_instr *sampler_deref_instr = NULL;
2054 LLVMValueRef texture_unit_offset = NULL;
2055 LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
2056 unsigned lod_src = 0;
2057 LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
2058 unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components;
2059 memset(¶ms, 0, sizeof(params));
2060 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2061
2062 if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
2063 visit_txs(bld_base, instr);
2064 return;
2065 }
2066 if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2067 sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2068 else if (instr->op == nir_texop_tg4) {
2069 sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
2070 sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
2071 } else if (instr->op == nir_texop_lod)
2072 sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
2073 for (unsigned i = 0; i < instr->num_srcs; i++) {
2074 switch (instr->src[i].src_type) {
2075 case nir_tex_src_coord: {
2076 LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
2077 if (coord_vals == 1)
2078 coords[0] = coord;
2079 else {
2080 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2081 coords[chan] = LLVMBuildExtractValue(builder, coord,
2082 chan, "");
2083 }
2084 for (unsigned chan = coord_vals; chan < 5; chan++)
2085 coords[chan] = coord_undef;
2086
2087 break;
2088 }
2089 case nir_tex_src_texture_deref:
2090 texture_deref_instr = nir_src_as_deref(instr->src[i].src);
2091 break;
2092 case nir_tex_src_sampler_deref:
2093 sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
2094 break;
2095 case nir_tex_src_comparator:
2096 sample_key |= LP_SAMPLER_SHADOW;
2097 coords[4] = get_src(bld_base, instr->src[i].src);
2098 coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
2099 break;
2100 case nir_tex_src_bias:
2101 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2102 lod_src = i;
2103 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2104 break;
2105 case nir_tex_src_lod:
2106 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2107 lod_src = i;
2108 if (instr->op == nir_texop_txf)
2109 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2110 else
2111 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
2112 break;
2113 case nir_tex_src_ddx: {
2114 int deriv_cnt = instr->coord_components;
2115 if (instr->is_array)
2116 deriv_cnt--;
2117 LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2118 if (deriv_cnt == 1)
2119 derivs.ddx[0] = deriv_val;
2120 else
2121 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2122 derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
2123 chan, "");
2124 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2125 derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
2126 break;
2127 }
2128 case nir_tex_src_ddy: {
2129 int deriv_cnt = instr->coord_components;
2130 if (instr->is_array)
2131 deriv_cnt--;
2132 LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2133 if (deriv_cnt == 1)
2134 derivs.ddy[0] = deriv_val;
2135 else
2136 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2137 derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
2138 chan, "");
2139 for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2140 derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
2141 break;
2142 }
2143 case nir_tex_src_offset: {
2144 int offset_cnt = instr->coord_components;
2145 if (instr->is_array)
2146 offset_cnt--;
2147 LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
2148 sample_key |= LP_SAMPLER_OFFSETS;
2149 if (offset_cnt == 1)
2150 offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
2151 else {
2152 for (unsigned chan = 0; chan < offset_cnt; ++chan) {
2153 offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
2154 chan, "");
2155 offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
2156 }
2157 }
2158 break;
2159 }
2160 case nir_tex_src_ms_index:
2161 sample_key |= LP_SAMPLER_FETCH_MS;
2162 ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2163 break;
2164
2165 case nir_tex_src_texture_offset:
2166 texture_unit_offset = get_src(bld_base, instr->src[i].src);
2167 break;
2168 case nir_tex_src_sampler_offset:
2169 break;
2170 default:
2171 assert(0);
2172 break;
2173 }
2174 }
2175 if (!sampler_deref_instr)
2176 sampler_deref_instr = texture_deref_instr;
2177
2178 if (explicit_lod)
2179 lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
2180
2181 if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
2182 instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
2183 for (unsigned chan = 0; chan < coord_vals; ++chan)
2184 coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
2185 else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2186 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2187 coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
2188
2189 if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
2190 /* move layer coord for 1d arrays. */
2191 coords[2] = coords[1];
2192 coords[1] = coord_undef;
2193 }
2194
2195 uint32_t samp_base_index = 0, tex_base_index = 0;
2196 if (!sampler_deref_instr) {
2197 int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
2198 if (samp_src_index == -1) {
2199 samp_base_index = instr->sampler_index;
2200 }
2201 }
2202 if (!texture_deref_instr) {
2203 int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
2204 if (tex_src_index == -1) {
2205 tex_base_index = instr->texture_index;
2206 }
2207 }
2208
2209 if (instr->op == nir_texop_txd) {
2210 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2211 params.derivs = &derivs;
2212 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2213 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2214 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2215 else
2216 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2217 } else
2218 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2219 }
2220
2221 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2222 params.sample_key = sample_key;
2223 params.offsets = offsets;
2224 params.texture_index = tex_base_index;
2225 params.texture_index_offset = texture_unit_offset;
2226 params.sampler_index = samp_base_index;
2227 params.coords = coords;
2228 params.texel = texel;
2229 params.lod = explicit_lod;
2230 params.ms_index = ms_index;
2231 params.aniso_filter_table = bld_base->aniso_filter_table;
2232 bld_base->tex(bld_base, ¶ms);
2233
2234 if (nir_dest_bit_size(instr->dest) != 32) {
2235 assert(nir_dest_bit_size(instr->dest) == 16);
2236 LLVMTypeRef vec_type = NULL;
2237 bool is_float = false;
2238 switch (nir_alu_type_get_base_type(instr->dest_type)) {
2239 case nir_type_float:
2240 is_float = true;
2241 break;
2242 case nir_type_int:
2243 vec_type = bld_base->int16_bld.vec_type;
2244 break;
2245 case nir_type_uint:
2246 vec_type = bld_base->uint16_bld.vec_type;
2247 break;
2248 default:
2249 unreachable("unexpected alu type");
2250 }
2251 for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
2252 if (is_float) {
2253 texel[i] = lp_build_float_to_half(gallivm, texel[i]);
2254 } else {
2255 texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
2256 texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
2257 }
2258 }
2259 }
2260
2261 assign_dest(bld_base, &instr->dest, texel);
2262
2263 }
2264
visit_ssa_undef(struct lp_build_nir_context * bld_base,const nir_ssa_undef_instr * instr)2265 static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
2266 const nir_ssa_undef_instr *instr)
2267 {
2268 unsigned num_components = instr->def.num_components;
2269 LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2270 struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
2271 for (unsigned i = 0; i < num_components; i++)
2272 undef[i] = LLVMGetUndef(undef_bld->vec_type);
2273 memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
2274 assign_ssa_dest(bld_base, &instr->def, undef);
2275 }
2276
visit_jump(struct lp_build_nir_context * bld_base,const nir_jump_instr * instr)2277 static void visit_jump(struct lp_build_nir_context *bld_base,
2278 const nir_jump_instr *instr)
2279 {
2280 switch (instr->type) {
2281 case nir_jump_break:
2282 bld_base->break_stmt(bld_base);
2283 break;
2284 case nir_jump_continue:
2285 bld_base->continue_stmt(bld_base);
2286 break;
2287 default:
2288 unreachable("Unknown jump instr\n");
2289 }
2290 }
2291
visit_deref(struct lp_build_nir_context * bld_base,nir_deref_instr * instr)2292 static void visit_deref(struct lp_build_nir_context *bld_base,
2293 nir_deref_instr *instr)
2294 {
2295 if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2296 nir_var_mem_global))
2297 return;
2298 LLVMValueRef result = NULL;
2299 switch(instr->deref_type) {
2300 case nir_deref_type_var: {
2301 struct hash_entry *entry = _mesa_hash_table_search(bld_base->vars, instr->var);
2302 result = entry->data;
2303 break;
2304 }
2305 default:
2306 unreachable("Unhandled deref_instr deref type");
2307 }
2308
2309 assign_ssa(bld_base, instr->dest.ssa.index, result);
2310 }
2311
visit_block(struct lp_build_nir_context * bld_base,nir_block * block)2312 static void visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2313 {
2314 nir_foreach_instr(instr, block)
2315 {
2316 switch (instr->type) {
2317 case nir_instr_type_alu:
2318 visit_alu(bld_base, nir_instr_as_alu(instr));
2319 break;
2320 case nir_instr_type_load_const:
2321 visit_load_const(bld_base, nir_instr_as_load_const(instr));
2322 break;
2323 case nir_instr_type_intrinsic:
2324 visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2325 break;
2326 case nir_instr_type_tex:
2327 visit_tex(bld_base, nir_instr_as_tex(instr));
2328 break;
2329 case nir_instr_type_phi:
2330 assert(0);
2331 break;
2332 case nir_instr_type_ssa_undef:
2333 visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2334 break;
2335 case nir_instr_type_jump:
2336 visit_jump(bld_base, nir_instr_as_jump(instr));
2337 break;
2338 case nir_instr_type_deref:
2339 visit_deref(bld_base, nir_instr_as_deref(instr));
2340 break;
2341 default:
2342 fprintf(stderr, "Unknown NIR instr type: ");
2343 nir_print_instr(instr, stderr);
2344 fprintf(stderr, "\n");
2345 abort();
2346 }
2347 }
2348 }
2349
visit_if(struct lp_build_nir_context * bld_base,nir_if * if_stmt)2350 static void visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2351 {
2352 LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2353
2354 bld_base->if_cond(bld_base, cond);
2355 visit_cf_list(bld_base, &if_stmt->then_list);
2356
2357 if (!exec_list_is_empty(&if_stmt->else_list)) {
2358 bld_base->else_stmt(bld_base);
2359 visit_cf_list(bld_base, &if_stmt->else_list);
2360 }
2361 bld_base->endif_stmt(bld_base);
2362 }
2363
visit_loop(struct lp_build_nir_context * bld_base,nir_loop * loop)2364 static void visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2365 {
2366 bld_base->bgnloop(bld_base);
2367 visit_cf_list(bld_base, &loop->body);
2368 bld_base->endloop(bld_base);
2369 }
2370
visit_cf_list(struct lp_build_nir_context * bld_base,struct exec_list * list)2371 static void visit_cf_list(struct lp_build_nir_context *bld_base,
2372 struct exec_list *list)
2373 {
2374 foreach_list_typed(nir_cf_node, node, node, list)
2375 {
2376 switch (node->type) {
2377 case nir_cf_node_block:
2378 visit_block(bld_base, nir_cf_node_as_block(node));
2379 break;
2380
2381 case nir_cf_node_if:
2382 visit_if(bld_base, nir_cf_node_as_if(node));
2383 break;
2384
2385 case nir_cf_node_loop:
2386 visit_loop(bld_base, nir_cf_node_as_loop(node));
2387 break;
2388
2389 default:
2390 assert(0);
2391 }
2392 }
2393 }
2394
2395 static void
handle_shader_output_decl(struct lp_build_nir_context * bld_base,struct nir_shader * nir,struct nir_variable * variable)2396 handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2397 struct nir_shader *nir,
2398 struct nir_variable *variable)
2399 {
2400 bld_base->emit_var_decl(bld_base, variable);
2401 }
2402
2403 /* vector registers are stored as arrays in LLVM side,
2404 so we can use GEP on them, as to do exec mask stores
2405 we need to operate on a single components.
2406 arrays are:
2407 0.x, 1.x, 2.x, 3.x
2408 0.y, 1.y, 2.y, 3.y
2409 ....
2410 */
get_register_type(struct lp_build_nir_context * bld_base,nir_register * reg)2411 static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
2412 nir_register *reg)
2413 {
2414 if (is_aos(bld_base))
2415 return bld_base->base.int_vec_type;
2416
2417 struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
2418
2419 LLVMTypeRef type = int_bld->vec_type;
2420 if (reg->num_array_elems)
2421 type = LLVMArrayType(type, reg->num_array_elems);
2422 if (reg->num_components > 1)
2423 type = LLVMArrayType(type, reg->num_components);
2424
2425 return type;
2426 }
2427
2428
lp_build_nir_llvm(struct lp_build_nir_context * bld_base,struct nir_shader * nir)2429 bool lp_build_nir_llvm(
2430 struct lp_build_nir_context *bld_base,
2431 struct nir_shader *nir)
2432 {
2433 struct nir_function *func;
2434
2435 nir_convert_from_ssa(nir, true);
2436 nir_lower_locals_to_regs(nir);
2437 nir_remove_dead_derefs(nir);
2438 nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2439
2440 if (is_aos(bld_base)) {
2441 nir_move_vec_src_uses_to_dest(nir);
2442 nir_lower_vec_to_movs(nir, NULL, NULL);
2443 }
2444
2445 nir_foreach_shader_out_variable(variable, nir)
2446 handle_shader_output_decl(bld_base, nir, variable);
2447
2448 if (nir->info.io_lowered) {
2449 uint64_t outputs_written = nir->info.outputs_written;
2450
2451 while (outputs_written) {
2452 unsigned location = u_bit_scan64(&outputs_written);
2453 nir_variable var = {0};
2454
2455 var.type = glsl_vec4_type();
2456 var.data.mode = nir_var_shader_out;
2457 var.data.location = location;
2458 var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2459 BITFIELD64_MASK(location));
2460 bld_base->emit_var_decl(bld_base, &var);
2461 }
2462 }
2463
2464 bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2465 _mesa_key_pointer_equal);
2466 bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2467 _mesa_key_pointer_equal);
2468 bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
2469
2470 func = (struct nir_function *)exec_list_get_head(&nir->functions);
2471
2472 nir_foreach_register(reg, &func->impl->registers) {
2473 LLVMTypeRef type = get_register_type(bld_base, reg);
2474 LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2475 type, "reg");
2476 _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2477 }
2478 nir_index_ssa_defs(func->impl);
2479 bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2480 visit_cf_list(bld_base, &func->impl->body);
2481
2482 free(bld_base->ssa_defs);
2483 ralloc_free(bld_base->vars);
2484 ralloc_free(bld_base->regs);
2485 ralloc_free(bld_base->range_ht);
2486 return true;
2487 }
2488
2489 /* do some basic opts to remove some things we don't want to see. */
lp_build_opt_nir(struct nir_shader * nir)2490 void lp_build_opt_nir(struct nir_shader *nir)
2491 {
2492 bool progress;
2493
2494 static const struct nir_lower_tex_options lower_tex_options = {
2495 .lower_tg4_offsets = true,
2496 .lower_txp = ~0u,
2497 };
2498 NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2499 NIR_PASS_V(nir, nir_lower_frexp);
2500
2501 NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2502 NIR_PASS_V(nir, nir_lower_fp16_casts);
2503 do {
2504 progress = false;
2505 NIR_PASS(progress, nir, nir_opt_constant_folding);
2506 NIR_PASS(progress, nir, nir_opt_algebraic);
2507 NIR_PASS(progress, nir, nir_lower_pack);
2508
2509 nir_lower_tex_options options = { 0, };
2510 NIR_PASS_V(nir, nir_lower_tex, &options);
2511
2512 const nir_lower_subgroups_options subgroups_options = {
2513 .subgroup_size = lp_native_vector_width / 32,
2514 .ballot_bit_size = 32,
2515 .ballot_components = 1,
2516 .lower_to_scalar = true,
2517 .lower_subgroup_masks = true,
2518 };
2519 NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
2520
2521 } while (progress);
2522
2523 do {
2524 progress = false;
2525 NIR_PASS(progress, nir, nir_opt_algebraic_late);
2526 if (progress) {
2527 NIR_PASS_V(nir, nir_copy_prop);
2528 NIR_PASS_V(nir, nir_opt_dce);
2529 NIR_PASS_V(nir, nir_opt_cse);
2530 }
2531 } while (progress);
2532
2533 if (nir_lower_bool_to_int32(nir)) {
2534 NIR_PASS_V(nir, nir_copy_prop);
2535 NIR_PASS_V(nir, nir_opt_dce);
2536 }
2537 }
2538