1 /*
2  * Copyright © 2016 Bas Nieuwenhuizen
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "ac_nir_to_llvm.h"
25 #include "ac_gpu_info.h"
26 #include "ac_binary.h"
27 #include "ac_llvm_build.h"
28 #include "ac_llvm_util.h"
29 #include "ac_shader_abi.h"
30 #include "ac_shader_util.h"
31 #include "nir/nir.h"
32 #include "nir/nir_deref.h"
33 #include "sid.h"
34 #include "util/bitscan.h"
35 #include "util/u_math.h"
36 #include <llvm/Config/llvm-config.h>
37 
38 struct ac_nir_context {
39    struct ac_llvm_context ac;
40    struct ac_shader_abi *abi;
41    const struct ac_shader_args *args;
42 
43    gl_shader_stage stage;
44    shader_info *info;
45 
46    LLVMValueRef *ssa_defs;
47 
48    LLVMValueRef scratch;
49    LLVMValueRef constant_data;
50 
51    struct hash_table *defs;
52    struct hash_table *phis;
53    struct hash_table *vars;
54    struct hash_table *verified_interp;
55 
56    LLVMValueRef main_function;
57    LLVMBasicBlockRef continue_block;
58    LLVMBasicBlockRef break_block;
59 
60    LLVMValueRef vertex_id_replaced;
61    LLVMValueRef instance_id_replaced;
62    LLVMValueRef tes_u_replaced;
63    LLVMValueRef tes_v_replaced;
64    LLVMValueRef tes_rel_patch_id_replaced;
65    LLVMValueRef tes_patch_id_replaced;
66 };
67 
68 static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
69                                            const nir_instr *instr, bool image);
70 
71 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
72                                      enum ac_descriptor_type desc_type, const nir_instr *instr,
73                                      LLVMValueRef index, bool image, bool write);
74 
get_def_type(struct ac_nir_context * ctx,const nir_ssa_def * def)75 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def)
76 {
77    LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
78    if (def->num_components > 1) {
79       type = LLVMVectorType(type, def->num_components);
80    }
81    return type;
82 }
83 
get_src(struct ac_nir_context * nir,nir_src src)84 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
85 {
86    assert(src.is_ssa);
87    return nir->ssa_defs[src.ssa->index];
88 }
89 
get_memory_ptr(struct ac_nir_context * ctx,nir_src src,unsigned bit_size,unsigned c_off)90 static LLVMValueRef get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size, unsigned c_off)
91 {
92    LLVMValueRef ptr = get_src(ctx, src);
93    LLVMValueRef lds_i8 = ctx->ac.lds;
94    if (ctx->stage != MESA_SHADER_COMPUTE)
95       lds_i8 = LLVMBuildBitCast(ctx->ac.builder, ctx->ac.lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), "");
96 
97    ptr = LLVMBuildAdd(ctx->ac.builder, ptr, LLVMConstInt(ctx->ac.i32, c_off, 0), "");
98    ptr = LLVMBuildGEP(ctx->ac.builder, lds_i8, &ptr, 1, "");
99    int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
100 
101    LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);
102 
103    return LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(type, addr_space), "");
104 }
105 
get_block(struct ac_nir_context * nir,const struct nir_block * b)106 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, const struct nir_block *b)
107 {
108    struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
109    return (LLVMBasicBlockRef)entry->data;
110 }
111 
get_alu_src(struct ac_nir_context * ctx,nir_alu_src src,unsigned num_components)112 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src,
113                                 unsigned num_components)
114 {
115    LLVMValueRef value = get_src(ctx, src.src);
116    bool need_swizzle = false;
117 
118    assert(value);
119    unsigned src_components = ac_get_llvm_num_components(value);
120    for (unsigned i = 0; i < num_components; ++i) {
121       assert(src.swizzle[i] < src_components);
122       if (src.swizzle[i] != i)
123          need_swizzle = true;
124    }
125 
126    if (need_swizzle || num_components != src_components) {
127       LLVMValueRef masks[] = {LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
128                               LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
129                               LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
130                               LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
131 
132       if (src_components > 1 && num_components == 1) {
133          value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], "");
134       } else if (src_components == 1 && num_components > 1) {
135          LLVMValueRef values[] = {value, value, value, value};
136          value = ac_build_gather_values(&ctx->ac, values, num_components);
137       } else {
138          LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
139          value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, "");
140       }
141    }
142    assert(!src.negate);
143    assert(!src.abs);
144    return value;
145 }
146 
emit_int_cmp(struct ac_llvm_context * ctx,LLVMIntPredicate pred,LLVMValueRef src0,LLVMValueRef src1)147 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred,
148                                  LLVMValueRef src0, LLVMValueRef src1)
149 {
150    src0 = ac_to_integer(ctx, src0);
151    src1 = ac_to_integer(ctx, src1);
152    return LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
153 }
154 
emit_float_cmp(struct ac_llvm_context * ctx,LLVMRealPredicate pred,LLVMValueRef src0,LLVMValueRef src1)155 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, LLVMRealPredicate pred,
156                                    LLVMValueRef src0, LLVMValueRef src1)
157 {
158    src0 = ac_to_float(ctx, src0);
159    src1 = ac_to_float(ctx, src1);
160    return LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
161 }
162 
emit_intrin_1f_param(struct ac_llvm_context * ctx,const char * intrin,LLVMTypeRef result_type,LLVMValueRef src0)163 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, const char *intrin,
164                                          LLVMTypeRef result_type, LLVMValueRef src0)
165 {
166    char name[64], type[64];
167    LLVMValueRef params[] = {
168       ac_to_float(ctx, src0),
169    };
170 
171    ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
172    ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
173    assert(length < sizeof(name));
174    return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
175 }
176 
emit_intrin_1f_param_scalar(struct ac_llvm_context * ctx,const char * intrin,LLVMTypeRef result_type,LLVMValueRef src0)177 static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx, const char *intrin,
178                                                 LLVMTypeRef result_type, LLVMValueRef src0)
179 {
180    if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
181       return emit_intrin_1f_param(ctx, intrin, result_type, src0);
182 
183    LLVMTypeRef elem_type = LLVMGetElementType(result_type);
184    LLVMValueRef ret = LLVMGetUndef(result_type);
185 
186    /* Scalarize the intrinsic, because vectors are not supported. */
187    for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
188       char name[64], type[64];
189       LLVMValueRef params[] = {
190          ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
191       };
192 
193       ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
194       ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
195       assert(length < sizeof(name));
196       ret = LLVMBuildInsertElement(
197          ctx->builder, ret,
198          ac_build_intrinsic(ctx, name, elem_type, params, 1, AC_FUNC_ATTR_READNONE),
199          LLVMConstInt(ctx->i32, i, 0), "");
200    }
201    return ret;
202 }
203 
emit_intrin_2f_param(struct ac_llvm_context * ctx,const char * intrin,LLVMTypeRef result_type,LLVMValueRef src0,LLVMValueRef src1)204 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin,
205                                          LLVMTypeRef result_type, LLVMValueRef src0,
206                                          LLVMValueRef src1)
207 {
208    char name[64], type[64];
209    LLVMValueRef params[] = {
210       ac_to_float(ctx, src0),
211       ac_to_float(ctx, src1),
212    };
213 
214    ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
215    ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
216    assert(length < sizeof(name));
217    return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
218 }
219 
emit_intrin_3f_param(struct ac_llvm_context * ctx,const char * intrin,LLVMTypeRef result_type,LLVMValueRef src0,LLVMValueRef src1,LLVMValueRef src2)220 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, const char *intrin,
221                                          LLVMTypeRef result_type, LLVMValueRef src0,
222                                          LLVMValueRef src1, LLVMValueRef src2)
223 {
224    char name[64], type[64];
225    LLVMValueRef params[] = {
226       ac_to_float(ctx, src0),
227       ac_to_float(ctx, src1),
228       ac_to_float(ctx, src2),
229    };
230 
231    ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
232    ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
233    assert(length < sizeof(name));
234    return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
235 }
236 
emit_bcsel(struct ac_llvm_context * ctx,LLVMValueRef src0,LLVMValueRef src1,LLVMValueRef src2)237 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,
238                                LLVMValueRef src2)
239 {
240    LLVMTypeRef src1_type = LLVMTypeOf(src1);
241    LLVMTypeRef src2_type = LLVMTypeOf(src2);
242 
243    if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
244        LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
245       src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
246    } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
247               LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
248       src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
249    }
250 
251    return LLVMBuildSelect(ctx->builder, src0, ac_to_integer_or_pointer(ctx, src1),
252                           ac_to_integer_or_pointer(ctx, src2), "");
253 }
254 
emit_iabs(struct ac_llvm_context * ctx,LLVMValueRef src0)255 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, LLVMValueRef src0)
256 {
257    return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
258 }
259 
emit_uint_carry(struct ac_llvm_context * ctx,const char * intrin,LLVMValueRef src0,LLVMValueRef src1)260 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, const char *intrin,
261                                     LLVMValueRef src0, LLVMValueRef src1)
262 {
263    LLVMTypeRef ret_type;
264    LLVMTypeRef types[] = {ctx->i32, ctx->i1};
265    LLVMValueRef res;
266    LLVMValueRef params[] = {src0, src1};
267    ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);
268 
269    res = ac_build_intrinsic(ctx, intrin, ret_type, params, 2, AC_FUNC_ATTR_READNONE);
270 
271    res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
272    res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
273    return res;
274 }
275 
emit_b2f(struct ac_llvm_context * ctx,LLVMValueRef src0,unsigned bitsize)276 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
277 {
278    assert(ac_get_elem_bits(ctx, LLVMTypeOf(src0)) == 1);
279 
280    switch (bitsize) {
281    case 16:
282       if (LLVMGetTypeKind(LLVMTypeOf(src0)) == LLVMVectorTypeKind) {
283          assert(LLVMGetVectorSize(LLVMTypeOf(src0)) == 2);
284          LLVMValueRef f[] = {
285             LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 0),
286                             ctx->f16_1, ctx->f16_0, ""),
287             LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 1),
288                             ctx->f16_1, ctx->f16_0, ""),
289          };
290          return ac_build_gather_values(ctx, f, 2);
291       }
292       return LLVMBuildSelect(ctx->builder, src0, ctx->f16_1, ctx->f16_0, "");
293    case 32:
294       return LLVMBuildSelect(ctx->builder, src0, ctx->f32_1, ctx->f32_0, "");
295    case 64:
296       return LLVMBuildSelect(ctx->builder, src0, ctx->f64_1, ctx->f64_0, "");
297    default:
298       unreachable("Unsupported bit size.");
299    }
300 }
301 
emit_f2b(struct ac_llvm_context * ctx,LLVMValueRef src0)302 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
303 {
304    src0 = ac_to_float(ctx, src0);
305    LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
306    return LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, "");
307 }
308 
emit_b2i(struct ac_llvm_context * ctx,LLVMValueRef src0,unsigned bitsize)309 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
310 {
311    switch (bitsize) {
312    case 8:
313       return LLVMBuildSelect(ctx->builder, src0, ctx->i8_1, ctx->i8_0, "");
314    case 16:
315       return LLVMBuildSelect(ctx->builder, src0, ctx->i16_1, ctx->i16_0, "");
316    case 32:
317       return LLVMBuildSelect(ctx->builder, src0, ctx->i32_1, ctx->i32_0, "");
318    case 64:
319       return LLVMBuildSelect(ctx->builder, src0, ctx->i64_1, ctx->i64_0, "");
320    default:
321       unreachable("Unsupported bit size.");
322    }
323 }
324 
emit_i2b(struct ac_llvm_context * ctx,LLVMValueRef src0)325 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
326 {
327    LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
328    return LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, "");
329 }
330 
emit_f2f16(struct ac_llvm_context * ctx,LLVMValueRef src0)331 static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0)
332 {
333    LLVMValueRef result;
334    LLVMValueRef cond = NULL;
335 
336    src0 = ac_to_float(ctx, src0);
337    result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
338 
339    if (ctx->chip_class >= GFX8) {
340       LLVMValueRef args[2];
341       /* Check if the result is a denormal - and flush to 0 if so. */
342       args[0] = result;
343       args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
344       cond =
345          ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
346    }
347 
348    /* need to convert back up to f32 */
349    result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
350 
351    if (ctx->chip_class >= GFX8)
352       result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
353    else {
354       /* for GFX6-GFX7 */
355       /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
356        * so compare the result and flush to 0 if it's smaller.
357        */
358       LLVMValueRef temp, cond2;
359       temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
360       cond = LLVMBuildFCmp(
361          ctx->builder, LLVMRealOGT,
362          LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
363          temp, "");
364       cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE, temp, ctx->f32_0, "");
365       cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
366       result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
367    }
368    return result;
369 }
370 
emit_umul_high(struct ac_llvm_context * ctx,LLVMValueRef src0,LLVMValueRef src1)371 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
372                                    LLVMValueRef src1)
373 {
374    LLVMValueRef dst64, result;
375    src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
376    src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
377 
378    dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
379    dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
380    result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
381    return result;
382 }
383 
emit_imul_high(struct ac_llvm_context * ctx,LLVMValueRef src0,LLVMValueRef src1)384 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
385                                    LLVMValueRef src1)
386 {
387    LLVMValueRef dst64, result;
388    src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
389    src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
390 
391    dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
392    dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
393    result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
394    return result;
395 }
396 
emit_bfm(struct ac_llvm_context * ctx,LLVMValueRef bits,LLVMValueRef offset)397 static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)
398 {
399    /* mask = ((1 << bits) - 1) << offset */
400    return LLVMBuildShl(
401       ctx->builder,
402       LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),
403       offset, "");
404 }
405 
emit_bitfield_select(struct ac_llvm_context * ctx,LLVMValueRef mask,LLVMValueRef insert,LLVMValueRef base)406 static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, LLVMValueRef mask,
407                                          LLVMValueRef insert, LLVMValueRef base)
408 {
409    /* Calculate:
410     *   (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
411     * Use the right-hand side, which the LLVM backend can convert to V_BFI.
412     */
413    return LLVMBuildXor(
414       ctx->builder, base,
415       LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
416 }
417 
emit_pack_2x16(struct ac_llvm_context * ctx,LLVMValueRef src0,LLVMValueRef (* pack)(struct ac_llvm_context * ctx,LLVMValueRef args[2]))418 static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0,
419                                    LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
420                                                         LLVMValueRef args[2]))
421 {
422    LLVMValueRef comp[2];
423 
424    src0 = ac_to_float(ctx, src0);
425    comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
426    comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
427 
428    return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
429 }
430 
emit_unpack_half_2x16(struct ac_llvm_context * ctx,LLVMValueRef src0)431 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0)
432 {
433    LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
434    LLVMValueRef temps[2], val;
435    int i;
436 
437    for (i = 0; i < 2; i++) {
438       val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
439       val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
440       val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
441       temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
442    }
443    return ac_build_gather_values(ctx, temps, 2);
444 }
445 
emit_ddxy(struct ac_nir_context * ctx,nir_op op,LLVMValueRef src0)446 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0)
447 {
448    unsigned mask;
449    int idx;
450    LLVMValueRef result;
451 
452    if (op == nir_op_fddx_fine)
453       mask = AC_TID_MASK_LEFT;
454    else if (op == nir_op_fddy_fine)
455       mask = AC_TID_MASK_TOP;
456    else
457       mask = AC_TID_MASK_TOP_LEFT;
458 
459    /* for DDX we want to next X pixel, DDY next Y pixel. */
460    if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
461       idx = 1;
462    else
463       idx = 2;
464 
465    result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
466    return result;
467 }
468 
469 struct waterfall_context {
470    LLVMBasicBlockRef phi_bb[2];
471    bool use_waterfall;
472 };
473 
474 /* To deal with divergent descriptors we can create a loop that handles all
475  * lanes with the same descriptor on a given iteration (henceforth a
476  * waterfall loop).
477  *
478  * These helper create the begin and end of the loop leaving the caller
479  * to implement the body.
480  *
481  * params:
482  *  - ctx is the usal nir context
483  *  - wctx is a temporary struct containing some loop info. Can be left uninitialized.
484  *  - value is the possibly divergent value for which we built the loop
485  *  - divergent is whether value is actually divergent. If false we just pass
486  *     things through.
487  */
enter_waterfall(struct ac_nir_context * ctx,struct waterfall_context * wctx,LLVMValueRef value,bool divergent)488 static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
489                                     LLVMValueRef value, bool divergent)
490 {
491    /* If the app claims the value is divergent but it is constant we can
492     * end up with a dynamic index of NULL. */
493    if (!value)
494       divergent = false;
495 
496    wctx->use_waterfall = divergent;
497    if (!divergent)
498       return value;
499 
500    ac_build_bgnloop(&ctx->ac, 6000);
501 
502    LLVMValueRef active = LLVMConstInt(ctx->ac.i1, 1, false);
503    LLVMValueRef scalar_value[NIR_MAX_VEC_COMPONENTS];
504 
505    for (unsigned i = 0; i < ac_get_llvm_num_components(value); i++) {
506       LLVMValueRef comp = ac_llvm_extract_elem(&ctx->ac, value, i);
507       scalar_value[i] = ac_build_readlane(&ctx->ac, comp, NULL);
508       active = LLVMBuildAnd(ctx->ac.builder, active,
509                             LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, comp, scalar_value[i], ""), "");
510    }
511 
512    wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
513    ac_build_ifcc(&ctx->ac, active, 6001);
514 
515    return ac_build_gather_values(&ctx->ac, scalar_value, ac_get_llvm_num_components(value));
516 }
517 
exit_waterfall(struct ac_nir_context * ctx,struct waterfall_context * wctx,LLVMValueRef value)518 static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
519                                    LLVMValueRef value)
520 {
521    LLVMValueRef ret = NULL;
522    LLVMValueRef phi_src[2];
523    LLVMValueRef cc_phi_src[2] = {
524       LLVMConstInt(ctx->ac.i32, 0, false),
525       LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
526    };
527 
528    if (!wctx->use_waterfall)
529       return value;
530 
531    wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
532 
533    ac_build_endif(&ctx->ac, 6001);
534 
535    if (value) {
536       phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
537       phi_src[1] = value;
538 
539       ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
540    }
541 
542    /*
543     * By using the optimization barrier on the exit decision, we decouple
544     * the operations from the break, and hence avoid LLVM hoisting the
545     * opteration into the break block.
546     */
547    LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
548    ac_build_optimization_barrier(&ctx->ac, &cc, false);
549 
550    LLVMValueRef active =
551       LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
552    ac_build_ifcc(&ctx->ac, active, 6002);
553    ac_build_break(&ctx->ac);
554    ac_build_endif(&ctx->ac, 6002);
555 
556    ac_build_endloop(&ctx->ac, 6000);
557    return ret;
558 }
559 
visit_alu(struct ac_nir_context * ctx,const nir_alu_instr * instr)560 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
561 {
562    LLVMValueRef src[16], result = NULL;
563    unsigned num_components = instr->dest.dest.ssa.num_components;
564    unsigned src_components;
565    LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
566 
567    assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
568    switch (instr->op) {
569    case nir_op_vec2:
570    case nir_op_vec3:
571    case nir_op_vec4:
572    case nir_op_vec5:
573    case nir_op_vec8:
574    case nir_op_vec16:
575    case nir_op_unpack_32_2x16:
576    case nir_op_unpack_64_2x32:
577    case nir_op_unpack_64_4x16:
578       src_components = 1;
579       break;
580    case nir_op_pack_half_2x16:
581    case nir_op_pack_snorm_2x16:
582    case nir_op_pack_unorm_2x16:
583    case nir_op_pack_uint_2x16:
584    case nir_op_pack_sint_2x16:
585    case nir_op_pack_32_2x16:
586    case nir_op_pack_64_2x32:
587       src_components = 2;
588       break;
589    case nir_op_unpack_half_2x16:
590       src_components = 1;
591       break;
592    case nir_op_cube_face_coord_amd:
593    case nir_op_cube_face_index_amd:
594       src_components = 3;
595       break;
596    case nir_op_pack_32_4x8:
597    case nir_op_pack_64_4x16:
598       src_components = 4;
599       break;
600    default:
601       src_components = num_components;
602       break;
603    }
604    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
605       src[i] = get_alu_src(ctx, instr->src[i], src_components);
606 
607    switch (instr->op) {
608    case nir_op_mov:
609       result = src[0];
610       break;
611    case nir_op_fneg:
612       src[0] = ac_to_float(&ctx->ac, src[0]);
613       result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
614       if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
615          /* fneg will be optimized by backend compiler with sign
616           * bit removed via XOR. This is probably a LLVM bug.
617           */
618          result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
619       }
620       break;
621    case nir_op_ineg:
622       if (instr->no_unsigned_wrap)
623          result = LLVMBuildNUWNeg(ctx->ac.builder, src[0], "");
624       else if (instr->no_signed_wrap)
625          result = LLVMBuildNSWNeg(ctx->ac.builder, src[0], "");
626       else
627          result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
628       break;
629    case nir_op_inot:
630       result = LLVMBuildNot(ctx->ac.builder, src[0], "");
631       break;
632    case nir_op_iadd:
633       if (instr->no_unsigned_wrap)
634          result = LLVMBuildNUWAdd(ctx->ac.builder, src[0], src[1], "");
635       else if (instr->no_signed_wrap)
636          result = LLVMBuildNSWAdd(ctx->ac.builder, src[0], src[1], "");
637       else
638          result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
639       break;
640    case nir_op_uadd_sat:
641    case nir_op_iadd_sat: {
642       char name[64], type[64];
643       ac_build_type_name_for_intr(def_type, type, sizeof(type));
644       snprintf(name, sizeof(name), "llvm.%cadd.sat.%s",
645                instr->op == nir_op_uadd_sat ? 'u' : 's', type);
646       result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 2, AC_FUNC_ATTR_READNONE);
647       break;
648    }
649    case nir_op_fadd:
650       src[0] = ac_to_float(&ctx->ac, src[0]);
651       src[1] = ac_to_float(&ctx->ac, src[1]);
652       result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
653       break;
654    case nir_op_fsub:
655       src[0] = ac_to_float(&ctx->ac, src[0]);
656       src[1] = ac_to_float(&ctx->ac, src[1]);
657       result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
658       break;
659    case nir_op_isub:
660       if (instr->no_unsigned_wrap)
661          result = LLVMBuildNUWSub(ctx->ac.builder, src[0], src[1], "");
662       else if (instr->no_signed_wrap)
663          result = LLVMBuildNSWSub(ctx->ac.builder, src[0], src[1], "");
664       else
665          result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
666       break;
667    case nir_op_imul:
668       if (instr->no_unsigned_wrap)
669          result = LLVMBuildNUWMul(ctx->ac.builder, src[0], src[1], "");
670       else if (instr->no_signed_wrap)
671          result = LLVMBuildNSWMul(ctx->ac.builder, src[0], src[1], "");
672       else
673          result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
674       break;
675    case nir_op_imod:
676       result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
677       break;
678    case nir_op_umod:
679       result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
680       break;
681    case nir_op_irem:
682       result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
683       break;
684    case nir_op_idiv:
685       result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
686       break;
687    case nir_op_udiv:
688       result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
689       break;
690    case nir_op_fmul:
691       src[0] = ac_to_float(&ctx->ac, src[0]);
692       src[1] = ac_to_float(&ctx->ac, src[1]);
693       result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
694       break;
695    case nir_op_fmulz:
696       assert(LLVM_VERSION_MAJOR >= 12);
697       src[0] = ac_to_float(&ctx->ac, src[0]);
698       src[1] = ac_to_float(&ctx->ac, src[1]);
699       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
700                                   src, 2, AC_FUNC_ATTR_READNONE);
701       break;
702    case nir_op_frcp:
703       /* For doubles, we need precise division to pass GLCTS. */
704       if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) {
705          result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, ac_to_float(&ctx->ac, src[0]), "");
706       } else {
707          result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
708                                               ac_to_float_type(&ctx->ac, def_type), src[0]);
709       }
710       if (ctx->abi->clamp_div_by_zero)
711          result = ac_build_fmin(&ctx->ac, result,
712                                 LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
713       break;
714    case nir_op_iand:
715       result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
716       break;
717    case nir_op_ior:
718       result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
719       break;
720    case nir_op_ixor:
721       result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
722       break;
723    case nir_op_ishl:
724       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
725           ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
726          src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
727       else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
728                ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
729          src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
730       result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
731       break;
732    case nir_op_ishr:
733       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
734           ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
735          src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
736       else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
737                ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
738          src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
739       result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
740       break;
741    case nir_op_ushr:
742       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
743           ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
744          src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
745       else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
746                ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
747          src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
748       result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
749       break;
750    case nir_op_ilt:
751       result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
752       break;
753    case nir_op_ine:
754       result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
755       break;
756    case nir_op_ieq:
757       result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
758       break;
759    case nir_op_ige:
760       result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
761       break;
762    case nir_op_ult:
763       result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
764       break;
765    case nir_op_uge:
766       result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
767       break;
768    case nir_op_feq:
769       result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
770       break;
771    case nir_op_fneu:
772       result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
773       break;
774    case nir_op_flt:
775       result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
776       break;
777    case nir_op_fge:
778       result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
779       break;
780    case nir_op_fabs:
781       result =
782          emit_intrin_1f_param(&ctx->ac, "llvm.fabs", ac_to_float_type(&ctx->ac, def_type), src[0]);
783       if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
784          /* fabs will be optimized by backend compiler with sign
785           * bit removed via AND.
786           */
787          result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
788       }
789       break;
790    case nir_op_fsat:
791       src[0] = ac_to_float(&ctx->ac, src[0]);
792       result = ac_build_fsat(&ctx->ac, src[0],
793                              ac_to_float_type(&ctx->ac, def_type));
794       break;
795    case nir_op_iabs:
796       result = emit_iabs(&ctx->ac, src[0]);
797       break;
798    case nir_op_imax:
799       result = ac_build_imax(&ctx->ac, src[0], src[1]);
800       break;
801    case nir_op_imin:
802       result = ac_build_imin(&ctx->ac, src[0], src[1]);
803       break;
804    case nir_op_umax:
805       result = ac_build_umax(&ctx->ac, src[0], src[1]);
806       break;
807    case nir_op_umin:
808       result = ac_build_umin(&ctx->ac, src[0], src[1]);
809       break;
810    case nir_op_isign:
811       result = ac_build_isign(&ctx->ac, src[0]);
812       break;
813    case nir_op_fsign:
814       src[0] = ac_to_float(&ctx->ac, src[0]);
815       result = ac_build_fsign(&ctx->ac, src[0]);
816       break;
817    case nir_op_ffloor:
818       result =
819          emit_intrin_1f_param(&ctx->ac, "llvm.floor", ac_to_float_type(&ctx->ac, def_type), src[0]);
820       break;
821    case nir_op_ftrunc:
822       result =
823          emit_intrin_1f_param(&ctx->ac, "llvm.trunc", ac_to_float_type(&ctx->ac, def_type), src[0]);
824       break;
825    case nir_op_fceil:
826       result =
827          emit_intrin_1f_param(&ctx->ac, "llvm.ceil", ac_to_float_type(&ctx->ac, def_type), src[0]);
828       break;
829    case nir_op_fround_even:
830       result =
831          emit_intrin_1f_param(&ctx->ac, "llvm.rint", ac_to_float_type(&ctx->ac, def_type), src[0]);
832       break;
833    case nir_op_ffract:
834       result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
835                                            ac_to_float_type(&ctx->ac, def_type), src[0]);
836       break;
837    case nir_op_fsin:
838       result =
839          emit_intrin_1f_param(&ctx->ac, "llvm.sin", ac_to_float_type(&ctx->ac, def_type), src[0]);
840       break;
841    case nir_op_fcos:
842       result =
843          emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);
844       break;
845    case nir_op_fsqrt:
846       result =
847          emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);
848       break;
849    case nir_op_fexp2:
850       result =
851          emit_intrin_1f_param(&ctx->ac, "llvm.exp2", ac_to_float_type(&ctx->ac, def_type), src[0]);
852       break;
853    case nir_op_flog2:
854       result =
855          emit_intrin_1f_param(&ctx->ac, "llvm.log2", ac_to_float_type(&ctx->ac, def_type), src[0]);
856       break;
857    case nir_op_frsq:
858       result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",
859                                            ac_to_float_type(&ctx->ac, def_type), src[0]);
860       if (ctx->abi->clamp_div_by_zero)
861          result = ac_build_fmin(&ctx->ac, result,
862                                 LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
863       break;
864    case nir_op_frexp_exp:
865       src[0] = ac_to_float(&ctx->ac, src[0]);
866       result = ac_build_frexp_exp(&ctx->ac, src[0], ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
867       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
868          result = LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
869       break;
870    case nir_op_frexp_sig:
871       src[0] = ac_to_float(&ctx->ac, src[0]);
872       result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
873       break;
874    case nir_op_fpow:
875       if (instr->dest.dest.ssa.bit_size != 32) {
876          /* 16 and 64 bits */
877          result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
878                                        ac_to_float_type(&ctx->ac, def_type), src[0]);
879          result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");
880          result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
881                                        ac_to_float_type(&ctx->ac, def_type), result);
882          break;
883       }
884       if (LLVM_VERSION_MAJOR >= 12) {
885          result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
886                                        ac_to_float_type(&ctx->ac, def_type), src[0]);
887          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
888                                      (LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},
889                                      2, AC_FUNC_ATTR_READNONE);
890          result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
891                                        ac_to_float_type(&ctx->ac, def_type), result);
892          break;
893       }
894       /* Older LLVM doesn't have fmul.legacy. */
895       result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
896                                     src[0], src[1]);
897       break;
898    case nir_op_fmax:
899       result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
900                                     src[0], src[1]);
901       if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
902          /* Only pre-GFX9 chips do not flush denorms. */
903          result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
904       }
905       break;
906    case nir_op_fmin:
907       result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type),
908                                     src[0], src[1]);
909       if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
910          /* Only pre-GFX9 chips do not flush denorms. */
911          result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
912       }
913       break;
914    case nir_op_ffma:
915       /* FMA is slow on gfx6-8, so it shouldn't be used. */
916       assert(instr->dest.dest.ssa.bit_size != 32 || ctx->ac.chip_class >= GFX9);
917       result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),
918                                     src[0], src[1], src[2]);
919       break;
920    case nir_op_ffmaz:
921       assert(LLVM_VERSION_MAJOR >= 12 && ctx->ac.chip_class >= GFX10_3);
922       src[0] = ac_to_float(&ctx->ac, src[0]);
923       src[1] = ac_to_float(&ctx->ac, src[1]);
924       src[2] = ac_to_float(&ctx->ac, src[2]);
925       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fma.legacy", ctx->ac.f32,
926                                   src, 3, AC_FUNC_ATTR_READNONE);
927       break;
928    case nir_op_ldexp:
929       src[0] = ac_to_float(&ctx->ac, src[0]);
930       if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
931          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2,
932                                      AC_FUNC_ATTR_READNONE);
933       else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
934          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2,
935                                      AC_FUNC_ATTR_READNONE);
936       else
937          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2,
938                                      AC_FUNC_ATTR_READNONE);
939       break;
940    case nir_op_bfm:
941       result = emit_bfm(&ctx->ac, src[0], src[1]);
942       break;
943    case nir_op_bitfield_select:
944       result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
945       break;
946    case nir_op_ubfe:
947       result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
948       break;
949    case nir_op_ibfe:
950       result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
951       break;
952    case nir_op_bitfield_reverse:
953       result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
954       break;
955    case nir_op_bit_count:
956       result = ac_build_bit_count(&ctx->ac, src[0]);
957       break;
958    case nir_op_vec2:
959    case nir_op_vec3:
960    case nir_op_vec4:
961    case nir_op_vec5:
962    case nir_op_vec8:
963    case nir_op_vec16:
964       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
965          src[i] = ac_to_integer(&ctx->ac, src[i]);
966       result = ac_build_gather_values(&ctx->ac, src, num_components);
967       break;
968    case nir_op_f2i8:
969    case nir_op_f2i16:
970    case nir_op_f2imp:
971    case nir_op_f2i32:
972    case nir_op_f2i64:
973       src[0] = ac_to_float(&ctx->ac, src[0]);
974       result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
975       break;
976    case nir_op_f2u8:
977    case nir_op_f2u16:
978    case nir_op_f2ump:
979    case nir_op_f2u32:
980    case nir_op_f2u64:
981       src[0] = ac_to_float(&ctx->ac, src[0]);
982       result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
983       break;
984    case nir_op_i2f16:
985    case nir_op_i2fmp:
986    case nir_op_i2f32:
987    case nir_op_i2f64:
988       result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
989       break;
990    case nir_op_u2f16:
991    case nir_op_u2fmp:
992    case nir_op_u2f32:
993    case nir_op_u2f64:
994       result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
995       break;
996    case nir_op_f2f16_rtz:
997    case nir_op_f2f16:
998    case nir_op_f2fmp:
999       src[0] = ac_to_float(&ctx->ac, src[0]);
1000 
1001       /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
1002        * all f32->f16 conversions have to round towards zero, because both scalar
1003        * and vec2 down-conversions have to round equally.
1004        */
1005       if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
1006          src[0] = ac_to_float(&ctx->ac, src[0]);
1007 
1008          if (LLVMTypeOf(src[0]) == ctx->ac.f64)
1009             src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
1010 
1011          /* Fast path conversion. This only works if NIR is vectorized
1012           * to vec2 16.
1013           */
1014          if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
1015             LLVMValueRef args[] = {
1016                ac_llvm_extract_elem(&ctx->ac, src[0], 0),
1017                ac_llvm_extract_elem(&ctx->ac, src[0], 1),
1018             };
1019             result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
1020             break;
1021          }
1022 
1023          assert(ac_get_llvm_num_components(src[0]) == 1);
1024          LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)};
1025          result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
1026          result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
1027       } else {
1028          if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1029             result =
1030                LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1031          else
1032             result =
1033                LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1034       }
1035       break;
1036    case nir_op_f2f16_rtne:
1037    case nir_op_f2f32:
1038    case nir_op_f2f64:
1039       src[0] = ac_to_float(&ctx->ac, src[0]);
1040       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1041          result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1042       else
1043          result =
1044             LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1045       break;
1046    case nir_op_u2u8:
1047    case nir_op_u2u16:
1048    case nir_op_u2u32:
1049    case nir_op_u2u64:
1050       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1051          result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
1052       else
1053          result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1054       break;
1055    case nir_op_i2i8:
1056    case nir_op_i2i16:
1057    case nir_op_i2imp:
1058    case nir_op_i2i32:
1059    case nir_op_i2i64:
1060       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1061          result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
1062       else
1063          result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1064       break;
1065    case nir_op_bcsel:
1066       result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
1067       break;
1068    case nir_op_find_lsb:
1069       result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
1070       break;
1071    case nir_op_ufind_msb:
1072       result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
1073       break;
1074    case nir_op_ifind_msb:
1075       result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
1076       break;
1077    case nir_op_uadd_carry:
1078       result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1079       break;
1080    case nir_op_usub_borrow:
1081       result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
1082       break;
1083    case nir_op_b2f16:
1084    case nir_op_b2f32:
1085    case nir_op_b2f64:
1086       result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1087       break;
1088    case nir_op_f2b1:
1089       result = emit_f2b(&ctx->ac, src[0]);
1090       break;
1091    case nir_op_b2i8:
1092    case nir_op_b2i16:
1093    case nir_op_b2i32:
1094    case nir_op_b2i64:
1095       result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1096       break;
1097    case nir_op_i2b1:
1098    case nir_op_b2b1: /* after loads */
1099       result = emit_i2b(&ctx->ac, src[0]);
1100       break;
1101    case nir_op_b2b16: /* before stores */
1102       result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i16, "");
1103       break;
1104    case nir_op_b2b32: /* before stores */
1105       result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i32, "");
1106       break;
1107    case nir_op_fquantize2f16:
1108       result = emit_f2f16(&ctx->ac, src[0]);
1109       break;
1110    case nir_op_umul_high:
1111       result = emit_umul_high(&ctx->ac, src[0], src[1]);
1112       break;
1113    case nir_op_imul_high:
1114       result = emit_imul_high(&ctx->ac, src[0], src[1]);
1115       break;
1116    case nir_op_pack_half_2x16:
1117       result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
1118       break;
1119    case nir_op_pack_half_2x16_split:
1120       src[0] = ac_to_float(&ctx->ac, src[0]);
1121       src[1] = ac_to_float(&ctx->ac, src[1]);
1122       result = LLVMBuildBitCast(ctx->ac.builder,
1123                                 ac_build_cvt_pkrtz_f16(&ctx->ac, src),
1124                                 ctx->ac.i32, "");
1125       break;
1126    case nir_op_pack_snorm_2x16:
1127       result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
1128       break;
1129    case nir_op_pack_unorm_2x16:
1130       result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
1131       break;
1132    case nir_op_pack_uint_2x16: {
1133       LLVMValueRef comp[2];
1134 
1135       comp[0] = LLVMBuildExtractElement(ctx->ac.builder, src[0], ctx->ac.i32_0, "");
1136       comp[1] = LLVMBuildExtractElement(ctx->ac.builder, src[0], ctx->ac.i32_1, "");
1137 
1138       result = ac_build_cvt_pk_u16(&ctx->ac, comp, 16, false);
1139       break;
1140    }
1141    case nir_op_pack_sint_2x16: {
1142       LLVMValueRef comp[2];
1143 
1144       comp[0] = LLVMBuildExtractElement(ctx->ac.builder, src[0], ctx->ac.i32_0, "");
1145       comp[1] = LLVMBuildExtractElement(ctx->ac.builder, src[0], ctx->ac.i32_1, "");
1146 
1147       result = ac_build_cvt_pk_i16(&ctx->ac, comp, 16, false);
1148       break;
1149    }
1150    case nir_op_unpack_half_2x16:
1151       result = emit_unpack_half_2x16(&ctx->ac, src[0]);
1152       break;
1153    case nir_op_unpack_half_2x16_split_x: {
1154       assert(ac_get_llvm_num_components(src[0]) == 1);
1155       LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);
1156       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1157       break;
1158    }
1159    case nir_op_unpack_half_2x16_split_y: {
1160       assert(ac_get_llvm_num_components(src[0]) == 1);
1161       LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);
1162       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1163       break;
1164    }
1165    case nir_op_fddx:
1166    case nir_op_fddy:
1167    case nir_op_fddx_fine:
1168    case nir_op_fddy_fine:
1169    case nir_op_fddx_coarse:
1170    case nir_op_fddy_coarse:
1171       result = emit_ddxy(ctx, instr->op, src[0]);
1172       break;
1173 
1174    case nir_op_unpack_64_4x16: {
1175       result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v4i16, "");
1176       break;
1177    }
1178    case nir_op_pack_64_4x16: {
1179       result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.i64, "");
1180       break;
1181    }
1182 
1183    case nir_op_unpack_64_2x32: {
1184       result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1185             ctx->ac.v2i32, "");
1186       break;
1187    }
1188    case nir_op_unpack_64_2x32_split_x: {
1189       assert(ac_get_llvm_num_components(src[0]) == 1);
1190       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
1191       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1192       break;
1193    }
1194    case nir_op_unpack_64_2x32_split_y: {
1195       assert(ac_get_llvm_num_components(src[0]) == 1);
1196       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
1197       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1198       break;
1199    }
1200 
1201    case nir_op_pack_64_2x32: {
1202       result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1203             ctx->ac.i64, "");
1204       break;
1205    }
1206    case nir_op_pack_64_2x32_split: {
1207       LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1208       result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
1209       break;
1210    }
1211 
1212    case nir_op_pack_32_4x8:
1213    case nir_op_pack_32_2x16: {
1214       result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1215             ctx->ac.i32, "");
1216       break;
1217    }
1218    case nir_op_pack_32_2x16_split: {
1219       LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1220       result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
1221       break;
1222    }
1223 
1224    case nir_op_unpack_32_2x16: {
1225       result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1226             ctx->ac.v2i16, "");
1227       break;
1228    }
1229    case nir_op_unpack_32_2x16_split_x: {
1230       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
1231       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1232       break;
1233    }
1234    case nir_op_unpack_32_2x16_split_y: {
1235       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
1236       result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1237       break;
1238    }
1239 
1240    case nir_op_cube_face_coord_amd: {
1241       src[0] = ac_to_float(&ctx->ac, src[0]);
1242       LLVMValueRef results[2];
1243       LLVMValueRef in[3];
1244       for (unsigned chan = 0; chan < 3; chan++)
1245          in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1246       results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3,
1247                                       AC_FUNC_ATTR_READNONE);
1248       results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3,
1249                                       AC_FUNC_ATTR_READNONE);
1250       LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3,
1251                                            AC_FUNC_ATTR_READNONE);
1252       results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
1253       results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
1254       LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
1255       results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
1256       results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
1257       result = ac_build_gather_values(&ctx->ac, results, 2);
1258       break;
1259    }
1260 
1261    case nir_op_cube_face_index_amd: {
1262       src[0] = ac_to_float(&ctx->ac, src[0]);
1263       LLVMValueRef in[3];
1264       for (unsigned chan = 0; chan < 3; chan++)
1265          in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1266       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3,
1267                                   AC_FUNC_ATTR_READNONE);
1268       break;
1269    }
1270 
1271    case nir_op_extract_u8:
1272    case nir_op_extract_i8:
1273    case nir_op_extract_u16:
1274    case nir_op_extract_i16: {
1275       bool is_signed = instr->op == nir_op_extract_i16 || instr->op == nir_op_extract_i8;
1276       unsigned size = instr->op == nir_op_extract_u8 || instr->op == nir_op_extract_i8 ? 8 : 16;
1277       LLVMValueRef offset = LLVMConstInt(LLVMTypeOf(src[0]), nir_src_as_uint(instr->src[1].src) * size, false);
1278       result = LLVMBuildLShr(ctx->ac.builder, src[0], offset, "");
1279       result = LLVMBuildTrunc(ctx->ac.builder, result, LLVMIntTypeInContext(ctx->ac.context, size), "");
1280       if (is_signed)
1281          result = LLVMBuildSExt(ctx->ac.builder, result, LLVMTypeOf(src[0]), "");
1282       else
1283          result = LLVMBuildZExt(ctx->ac.builder, result, LLVMTypeOf(src[0]), "");
1284       break;
1285    }
1286 
1287    case nir_op_insert_u8:
1288    case nir_op_insert_u16: {
1289       unsigned size = instr->op == nir_op_insert_u8 ? 8 : 16;
1290       LLVMValueRef offset = LLVMConstInt(LLVMTypeOf(src[0]), nir_src_as_uint(instr->src[1].src) * size, false);
1291       LLVMValueRef mask = LLVMConstInt(LLVMTypeOf(src[0]), u_bit_consecutive(0, size), false);
1292       result = LLVMBuildShl(ctx->ac.builder, LLVMBuildAnd(ctx->ac.builder, src[0], mask, ""), offset, "");
1293       break;
1294    }
1295 
1296    case nir_op_sdot_4x8_iadd:
1297    case nir_op_udot_4x8_uadd:
1298    case nir_op_sdot_4x8_iadd_sat:
1299    case nir_op_udot_4x8_uadd_sat: {
1300       const char *name = instr->op == nir_op_sdot_4x8_iadd ||
1301                          instr->op == nir_op_sdot_4x8_iadd_sat
1302                          ? "llvm.amdgcn.sdot4" : "llvm.amdgcn.udot4";
1303       src[3] = LLVMConstInt(ctx->ac.i1, instr->op == nir_op_sdot_4x8_iadd_sat ||
1304                                         instr->op == nir_op_udot_4x8_uadd_sat, false);
1305       result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 4, AC_FUNC_ATTR_READNONE);
1306       break;
1307    }
1308 
1309    case nir_op_sdot_2x16_iadd:
1310    case nir_op_udot_2x16_uadd:
1311    case nir_op_sdot_2x16_iadd_sat:
1312    case nir_op_udot_2x16_uadd_sat: {
1313       const char *name = instr->op == nir_op_sdot_2x16_iadd ||
1314                          instr->op == nir_op_sdot_2x16_iadd_sat
1315                          ? "llvm.amdgcn.sdot2" : "llvm.amdgcn.udot2";
1316       src[0] = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
1317       src[1] = LLVMBuildBitCast(ctx->ac.builder, src[1], ctx->ac.v2i16, "");
1318       src[3] = LLVMConstInt(ctx->ac.i1, instr->op == nir_op_sdot_2x16_iadd_sat ||
1319                                         instr->op == nir_op_udot_2x16_uadd_sat, false);
1320       result = ac_build_intrinsic(&ctx->ac, name, def_type, src, 4, AC_FUNC_ATTR_READNONE);
1321       break;
1322    }
1323 
1324    case nir_op_sad_u8x4:
1325       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.sad.u8", ctx->ac.i32,
1326                                   (LLVMValueRef[]){src[0], src[1], src[2]}, 3,
1327                                   AC_FUNC_ATTR_READNONE);
1328       break;
1329 
1330    default:
1331       fprintf(stderr, "Unknown NIR alu instr: ");
1332       nir_print_instr(&instr->instr, stderr);
1333       fprintf(stderr, "\n");
1334       abort();
1335    }
1336 
1337    if (result) {
1338       assert(instr->dest.dest.is_ssa);
1339       result = ac_to_integer_or_pointer(&ctx->ac, result);
1340       ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
1341    }
1342 }
1343 
visit_load_const(struct ac_nir_context * ctx,const nir_load_const_instr * instr)1344 static void visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr)
1345 {
1346    LLVMValueRef values[4], value = NULL;
1347    LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
1348 
1349    for (unsigned i = 0; i < instr->def.num_components; ++i) {
1350       switch (instr->def.bit_size) {
1351       case 1:
1352          values[i] = LLVMConstInt(element_type, instr->value[i].b, false);
1353          break;
1354       case 8:
1355          values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);
1356          break;
1357       case 16:
1358          values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);
1359          break;
1360       case 32:
1361          values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);
1362          break;
1363       case 64:
1364          values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);
1365          break;
1366       default:
1367          fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);
1368          abort();
1369       }
1370    }
1371    if (instr->def.num_components > 1) {
1372       value = LLVMConstVector(values, instr->def.num_components);
1373    } else
1374       value = values[0];
1375 
1376    ctx->ssa_defs[instr->def.index] = value;
1377 }
1378 
get_buffer_size(struct ac_nir_context * ctx,LLVMValueRef descriptor,bool in_elements)1379 static LLVMValueRef get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor,
1380                                     bool in_elements)
1381 {
1382    LLVMValueRef size =
1383       LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), "");
1384 
1385    /* GFX8 only */
1386    if (ctx->ac.chip_class == GFX8 && in_elements) {
1387       /* On GFX8, the descriptor contains the size in bytes,
1388        * but TXQ must return the size in elements.
1389        * The stride is always non-zero for resources using TXQ.
1390        */
1391       LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, descriptor, ctx->ac.i32_1, "");
1392       stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, false), "");
1393       stride = LLVMBuildAnd(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
1394 
1395       size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
1396    }
1397    return size;
1398 }
1399 
1400 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
1401  * incorrectly forces nearest filtering if the texture format is integer.
1402  * The only effect it has on Gather4, which always returns 4 texels for
1403  * bilinear filtering, is that the final coordinates are off by 0.5 of
1404  * the texel size.
1405  *
1406  * The workaround is to subtract 0.5 from the unnormalized coordinates,
1407  * or (0.5 / size) from the normalized coordinates.
1408  *
1409  * However, cube textures with 8_8_8_8 data formats require a different
1410  * workaround of overriding the num format to USCALED/SSCALED. This would lose
1411  * precision in 32-bit data formats, so it needs to be applied dynamically at
1412  * runtime. In this case, return an i1 value that indicates whether the
1413  * descriptor was overridden (and hence a fixup of the sampler result is needed).
1414  */
lower_gather4_integer(struct ac_llvm_context * ctx,struct ac_image_args * args,const nir_tex_instr * instr)1415 static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, struct ac_image_args *args,
1416                                           const nir_tex_instr *instr)
1417 {
1418    nir_alu_type stype = nir_alu_type_get_base_type(instr->dest_type);
1419    LLVMValueRef wa_8888 = NULL;
1420    LLVMValueRef half_texel[2];
1421    LLVMValueRef result;
1422 
1423    assert(stype == nir_type_int || stype == nir_type_uint);
1424 
1425    if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1426       LLVMValueRef formats;
1427       LLVMValueRef data_format;
1428       LLVMValueRef wa_formats;
1429 
1430       formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
1431 
1432       data_format = LLVMBuildLShr(ctx->builder, formats, LLVMConstInt(ctx->i32, 20, false), "");
1433       data_format =
1434          LLVMBuildAnd(ctx->builder, data_format, LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
1435       wa_8888 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, data_format,
1436                               LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
1437 
1438       uint32_t wa_num_format = stype == nir_type_uint
1439                                   ? S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED)
1440                                   : S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
1441       wa_formats = LLVMBuildAnd(ctx->builder, formats,
1442                                 LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), "");
1443       wa_formats =
1444          LLVMBuildOr(ctx->builder, wa_formats, LLVMConstInt(ctx->i32, wa_num_format, false), "");
1445 
1446       formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
1447       args->resource =
1448          LLVMBuildInsertElement(ctx->builder, args->resource, formats, ctx->i32_1, "");
1449    }
1450 
1451    if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1452       assert(!wa_8888);
1453       half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
1454    } else {
1455       struct ac_image_args resinfo = {0};
1456       LLVMBasicBlockRef bbs[2];
1457 
1458       LLVMValueRef unnorm = NULL;
1459       LLVMValueRef default_offset = ctx->f32_0;
1460       if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) {
1461          /* In vulkan, whether the sampler uses unnormalized
1462           * coordinates or not is a dynamic property of the
1463           * sampler. Hence, to figure out whether or not we
1464           * need to divide by the texture size, we need to test
1465           * the sampler at runtime. This tests the bit set by
1466           * radv_init_sampler().
1467           */
1468          LLVMValueRef sampler0 =
1469             LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
1470          sampler0 = LLVMBuildLShr(ctx->builder, sampler0, LLVMConstInt(ctx->i32, 15, false), "");
1471          sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
1472          unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
1473          default_offset = LLVMConstReal(ctx->f32, -0.5);
1474       }
1475 
1476       bbs[0] = LLVMGetInsertBlock(ctx->builder);
1477       if (wa_8888 || unnorm) {
1478          assert(!(wa_8888 && unnorm));
1479          LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
1480          /* Skip the texture size query entirely if we don't need it. */
1481          ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
1482          bbs[1] = LLVMGetInsertBlock(ctx->builder);
1483       }
1484 
1485       /* Query the texture size. */
1486       resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
1487       resinfo.opcode = ac_image_get_resinfo;
1488       resinfo.dmask = 0xf;
1489       resinfo.lod = ctx->i32_0;
1490       resinfo.resource = args->resource;
1491       resinfo.attributes = AC_FUNC_ATTR_READNONE;
1492       LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
1493 
1494       /* Compute -0.5 / size. */
1495       for (unsigned c = 0; c < 2; c++) {
1496          half_texel[c] =
1497             LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, 0), "");
1498          half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1499          half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
1500          half_texel[c] =
1501             LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), "");
1502       }
1503 
1504       if (wa_8888 || unnorm) {
1505          ac_build_endif(ctx, 2000);
1506 
1507          for (unsigned c = 0; c < 2; c++) {
1508             LLVMValueRef values[2] = {default_offset, half_texel[c]};
1509             half_texel[c] = ac_build_phi(ctx, ctx->f32, 2, values, bbs);
1510          }
1511       }
1512    }
1513 
1514    for (unsigned c = 0; c < 2; c++) {
1515       LLVMValueRef tmp;
1516       tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
1517       args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1518    }
1519 
1520    args->attributes = AC_FUNC_ATTR_READNONE;
1521    result = ac_build_image_opcode(ctx, args);
1522 
1523    if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1524       LLVMValueRef tmp, tmp2;
1525 
1526       /* if the cube workaround is in place, f2i the result. */
1527       for (unsigned c = 0; c < 4; c++) {
1528          tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
1529          if (stype == nir_type_uint)
1530             tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
1531          else
1532             tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
1533          tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1534          tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
1535          tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
1536          tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1537          result =
1538             LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
1539       }
1540    }
1541    return result;
1542 }
1543 
build_tex_intrinsic(struct ac_nir_context * ctx,const nir_tex_instr * instr,struct ac_image_args * args)1544 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr,
1545                                         struct ac_image_args *args)
1546 {
1547    assert((!args->tfe || !args->d16) && "unsupported");
1548 
1549    if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
1550       unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
1551 
1552       assert(instr->dest.is_ssa);
1553 
1554       /* Buffers don't support A16. */
1555       if (args->a16)
1556          args->coords[0] = LLVMBuildZExt(ctx->ac.builder, args->coords[0], ctx->ac.i32, "");
1557 
1558       return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,
1559                                          util_last_bit(mask), 0, true,
1560                                          instr->dest.ssa.bit_size == 16,
1561                                          args->tfe);
1562    }
1563 
1564    args->opcode = ac_image_sample;
1565 
1566    switch (instr->op) {
1567    case nir_texop_txf:
1568    case nir_texop_txf_ms:
1569    case nir_texop_samples_identical:
1570       args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS
1571                         ? ac_image_load
1572                         : ac_image_load_mip;
1573       args->level_zero = false;
1574       break;
1575    case nir_texop_txs:
1576    case nir_texop_query_levels:
1577       args->opcode = ac_image_get_resinfo;
1578       if (!args->lod)
1579          args->lod = ctx->ac.i32_0;
1580       args->level_zero = false;
1581       break;
1582    case nir_texop_tex:
1583       if (ctx->stage != MESA_SHADER_FRAGMENT &&
1584           (ctx->stage != MESA_SHADER_COMPUTE ||
1585            ctx->info->cs.derivative_group == DERIVATIVE_GROUP_NONE)) {
1586          assert(!args->lod);
1587          args->level_zero = true;
1588       }
1589       break;
1590    case nir_texop_tg4:
1591       args->opcode = ac_image_gather4;
1592       if (!args->lod && !args->bias)
1593          args->level_zero = true;
1594       break;
1595    case nir_texop_lod:
1596       args->opcode = ac_image_get_lod;
1597       break;
1598    case nir_texop_fragment_fetch_amd:
1599    case nir_texop_fragment_mask_fetch_amd:
1600       args->opcode = ac_image_load;
1601       args->level_zero = false;
1602       break;
1603    default:
1604       break;
1605    }
1606 
1607    /* Aldebaran doesn't have image_sample_lz, but image_sample behaves like lz. */
1608    if (!ctx->ac.info->has_3d_cube_border_color_mipmap)
1609       args->level_zero = false;
1610 
1611    if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8 &&
1612        (instr->dest_type & (nir_type_int | nir_type_uint))) {
1613       return lower_gather4_integer(&ctx->ac, args, instr);
1614    }
1615 
1616    /* Fixup for GFX9 which allocates 1D textures as 2D. */
1617    if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
1618       if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {
1619          args->coords[1] = ctx->ac.i32_0;
1620       }
1621    }
1622 
1623    args->attributes = AC_FUNC_ATTR_READNONE;
1624    bool cs_derivs =
1625       ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
1626    if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
1627       /* Prevent texture instructions with implicit derivatives from being
1628        * sinked into branches. */
1629       switch (instr->op) {
1630       case nir_texop_tex:
1631       case nir_texop_txb:
1632       case nir_texop_lod:
1633          args->attributes |= AC_FUNC_ATTR_CONVERGENT;
1634          break;
1635       default:
1636          break;
1637       }
1638    }
1639 
1640    return ac_build_image_opcode(&ctx->ac, args);
1641 }
1642 
visit_load_push_constant(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1643 static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1644 {
1645    LLVMValueRef ptr, addr;
1646    LLVMValueRef src0 = get_src(ctx, instr->src[0]);
1647    unsigned index = nir_intrinsic_base(instr);
1648 
1649    addr = LLVMConstInt(ctx->ac.i32, index, 0);
1650    addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
1651 
1652    /* Load constant values from user SGPRS when possible, otherwise
1653     * fallback to the default path that loads directly from memory.
1654     */
1655    if (LLVMIsConstant(src0) && instr->dest.ssa.bit_size == 32) {
1656       unsigned count = instr->dest.ssa.num_components;
1657       unsigned offset = index;
1658 
1659       offset += LLVMConstIntGetZExtValue(src0);
1660       offset /= 4;
1661 
1662       offset -= ctx->args->base_inline_push_consts;
1663 
1664       unsigned num_inline_push_consts = 0;
1665       for (unsigned i = 0; i < ARRAY_SIZE(ctx->args->inline_push_consts); i++) {
1666          if (ctx->args->inline_push_consts[i].used)
1667             num_inline_push_consts++;
1668       }
1669 
1670       if (offset + count <= num_inline_push_consts) {
1671          LLVMValueRef *const push_constants = alloca(num_inline_push_consts * sizeof(LLVMValueRef));
1672          for (unsigned i = 0; i < num_inline_push_consts; i++)
1673             push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[i]);
1674          return ac_build_gather_values(&ctx->ac, push_constants + offset, count);
1675       }
1676    }
1677 
1678    ptr =
1679       LLVMBuildGEP(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, "");
1680 
1681    if (instr->dest.ssa.bit_size == 8) {
1682       unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
1683       LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
1684       ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1685       LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1686 
1687       LLVMValueRef params[3];
1688       if (load_dwords > 1) {
1689          LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
1690          params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
1691                                              LLVMConstInt(ctx->ac.i32, 1, false), "");
1692          params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
1693                                              LLVMConstInt(ctx->ac.i32, 0, false), "");
1694       } else {
1695          res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
1696          params[0] = ctx->ac.i32_0;
1697          params[1] = res;
1698       }
1699       params[2] = addr;
1700       res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
1701 
1702       res = LLVMBuildTrunc(
1703          ctx->ac.builder, res,
1704          LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
1705       if (instr->dest.ssa.num_components > 1)
1706          res = LLVMBuildBitCast(ctx->ac.builder, res,
1707                                 LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
1708       return res;
1709    } else if (instr->dest.ssa.bit_size == 16) {
1710       unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
1711       LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
1712       ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1713       LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1714       res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
1715       LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
1716       cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
1717       LLVMValueRef mask[] = {
1718          LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
1719          LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
1720          LLVMConstInt(ctx->ac.i32, 4, false)};
1721       LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
1722       LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
1723       LLVMValueRef shuffle_aligned =
1724          LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
1725       LLVMValueRef shuffle_unaligned =
1726          LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
1727       res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
1728       return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
1729    }
1730 
1731    ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
1732 
1733    return LLVMBuildLoad(ctx->ac.builder, ptr, "");
1734 }
1735 
visit_get_ssbo_size(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)1736 static LLVMValueRef visit_get_ssbo_size(struct ac_nir_context *ctx,
1737                                         const nir_intrinsic_instr *instr)
1738 {
1739    bool non_uniform = nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM;
1740    LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[0]), false, non_uniform);
1741    return get_buffer_size(ctx, rsrc, false);
1742 }
1743 
extract_vector_range(struct ac_llvm_context * ctx,LLVMValueRef src,unsigned start,unsigned count)1744 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
1745                                          unsigned start, unsigned count)
1746 {
1747    LLVMValueRef mask[] = {ctx->i32_0, ctx->i32_1, LLVMConstInt(ctx->i32, 2, false),
1748                           LLVMConstInt(ctx->i32, 3, false)};
1749 
1750    unsigned src_elements = ac_get_llvm_num_components(src);
1751 
1752    if (count == src_elements) {
1753       assert(start == 0);
1754       return src;
1755    } else if (count == 1) {
1756       assert(start < src_elements);
1757       return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
1758    } else {
1759       assert(start + count <= src_elements);
1760       assert(count <= 4);
1761       LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
1762       return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
1763    }
1764 }
1765 
get_cache_policy(struct ac_nir_context * ctx,enum gl_access_qualifier access,bool may_store_unaligned,bool writeonly_memory)1766 static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access,
1767                                  bool may_store_unaligned, bool writeonly_memory)
1768 {
1769    unsigned cache_policy = 0;
1770 
1771    /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores.  All
1772     * store opcodes not aligned to a dword are affected. The only way to
1773     * get unaligned stores is through shader images.
1774     */
1775    if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
1776         /* If this is write-only, don't keep data in L1 to prevent
1777          * evicting L1 cache lines that may be needed by other
1778          * instructions.
1779          */
1780         writeonly_memory || access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
1781       cache_policy |= ac_glc;
1782    }
1783 
1784    if (access & ACCESS_STREAM_CACHE_POLICY)
1785       cache_policy |= ac_slc | ac_glc;
1786 
1787    return cache_policy;
1788 }
1789 
enter_waterfall_ssbo(struct ac_nir_context * ctx,struct waterfall_context * wctx,const nir_intrinsic_instr * instr,nir_src src)1790 static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
1791                                          const nir_intrinsic_instr *instr, nir_src src)
1792 {
1793    return enter_waterfall(ctx, wctx, get_src(ctx, src),
1794                           nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
1795 }
1796 
visit_store_ssbo(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1797 static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1798 {
1799    if (ctx->ac.postponed_kill) {
1800       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
1801       ac_build_ifcc(&ctx->ac, cond, 7000);
1802    }
1803 
1804    LLVMValueRef src_data = get_src(ctx, instr->src[0]);
1805    int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
1806    unsigned writemask = nir_intrinsic_write_mask(instr);
1807    enum gl_access_qualifier access = nir_intrinsic_access(instr);
1808    bool writeonly_memory = access & ACCESS_NON_READABLE;
1809    unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
1810 
1811    struct waterfall_context wctx;
1812    LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
1813 
1814    LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true, false);
1815    LLVMValueRef base_data = src_data;
1816    base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
1817    LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
1818 
1819    while (writemask) {
1820       int start, count;
1821       LLVMValueRef data, offset;
1822       LLVMTypeRef data_type;
1823 
1824       u_bit_scan_consecutive_range(&writemask, &start, &count);
1825 
1826       if (count == 3 && elem_size_bytes != 4) {
1827          writemask |= 1 << (start + 2);
1828          count = 2;
1829       }
1830       int num_bytes = count * elem_size_bytes; /* count in bytes */
1831 
1832       /* we can only store 4 DWords at the same time.
1833        * can only happen for 64 Bit vectors. */
1834       if (num_bytes > 16) {
1835          writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
1836          count = 2;
1837          num_bytes = 16;
1838       }
1839 
1840       /* check alignment of 16 Bit stores */
1841       if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
1842          writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1843          count = 1;
1844          num_bytes = 2;
1845       }
1846 
1847       /* Due to alignment issues, split stores of 8-bit/16-bit
1848        * vectors.
1849        */
1850       if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
1851          writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1852          count = 1;
1853          num_bytes = elem_size_bytes;
1854       }
1855 
1856       data = extract_vector_range(&ctx->ac, base_data, start, count);
1857 
1858       offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
1859                             LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
1860 
1861       if (num_bytes == 1) {
1862          ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
1863       } else if (num_bytes == 2) {
1864          ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
1865       } else {
1866          switch (num_bytes) {
1867          case 16: /* v4f32 */
1868             data_type = ctx->ac.v4f32;
1869             break;
1870          case 12: /* v3f32 */
1871             data_type = ctx->ac.v3f32;
1872             break;
1873          case 8: /* v2f32 */
1874             data_type = ctx->ac.v2f32;
1875             break;
1876          case 4: /* f32 */
1877             data_type = ctx->ac.f32;
1878             break;
1879          default:
1880             unreachable("Malformed vector store.");
1881          }
1882          data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
1883 
1884          ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset,
1885                                      ctx->ac.i32_0, 0, cache_policy);
1886       }
1887    }
1888 
1889    exit_waterfall(ctx, &wctx, NULL);
1890 
1891    if (ctx->ac.postponed_kill)
1892       ac_build_endif(&ctx->ac, 7000);
1893 }
1894 
emit_ssbo_comp_swap_64(struct ac_nir_context * ctx,LLVMValueRef descriptor,LLVMValueRef offset,LLVMValueRef compare,LLVMValueRef exchange,bool image)1895 static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef descriptor,
1896                                            LLVMValueRef offset, LLVMValueRef compare,
1897                                            LLVMValueRef exchange, bool image)
1898 {
1899    LLVMBasicBlockRef start_block = NULL, then_block = NULL;
1900    if (ctx->abi->robust_buffer_access || image) {
1901       LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
1902 
1903       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
1904       start_block = LLVMGetInsertBlock(ctx->ac.builder);
1905 
1906       ac_build_ifcc(&ctx->ac, cond, -1);
1907 
1908       then_block = LLVMGetInsertBlock(ctx->ac.builder);
1909    }
1910 
1911    if (image)
1912       offset = LLVMBuildMul(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, 8, false), "");
1913 
1914    LLVMValueRef ptr_parts[2] = {
1915       ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
1916       LLVMBuildAnd(ctx->ac.builder, ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
1917                    LLVMConstInt(ctx->ac.i32, 65535, 0), "")};
1918 
1919    ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
1920    ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
1921 
1922    offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
1923 
1924    LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
1925    ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
1926    ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
1927    ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL),
1928                            "");
1929 
1930    LLVMValueRef result =
1931       ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
1932    result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
1933 
1934    if (ctx->abi->robust_buffer_access || image) {
1935       ac_build_endif(&ctx->ac, -1);
1936 
1937       LLVMBasicBlockRef incoming_blocks[2] = {
1938          start_block,
1939          then_block,
1940       };
1941 
1942       LLVMValueRef incoming_values[2] = {
1943          LLVMConstInt(ctx->ac.i64, 0, 0),
1944          result,
1945       };
1946       LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
1947       LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
1948       return ret;
1949    } else {
1950       return result;
1951    }
1952 }
1953 
visit_atomic_ssbo(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)1954 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1955 {
1956    if (ctx->ac.postponed_kill) {
1957       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
1958       ac_build_ifcc(&ctx->ac, cond, 7001);
1959    }
1960 
1961    LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
1962    const char *op;
1963    char name[64], type[8];
1964    LLVMValueRef params[6], descriptor;
1965    LLVMValueRef result;
1966    int arg_count = 0;
1967 
1968    struct waterfall_context wctx;
1969    LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
1970 
1971    switch (instr->intrinsic) {
1972    case nir_intrinsic_ssbo_atomic_add:
1973       op = "add";
1974       break;
1975    case nir_intrinsic_ssbo_atomic_imin:
1976       op = "smin";
1977       break;
1978    case nir_intrinsic_ssbo_atomic_umin:
1979       op = "umin";
1980       break;
1981    case nir_intrinsic_ssbo_atomic_imax:
1982       op = "smax";
1983       break;
1984    case nir_intrinsic_ssbo_atomic_umax:
1985       op = "umax";
1986       break;
1987    case nir_intrinsic_ssbo_atomic_and:
1988       op = "and";
1989       break;
1990    case nir_intrinsic_ssbo_atomic_or:
1991       op = "or";
1992       break;
1993    case nir_intrinsic_ssbo_atomic_xor:
1994       op = "xor";
1995       break;
1996    case nir_intrinsic_ssbo_atomic_exchange:
1997       op = "swap";
1998       break;
1999    case nir_intrinsic_ssbo_atomic_comp_swap:
2000       op = "cmpswap";
2001       break;
2002    case nir_intrinsic_ssbo_atomic_fmin:
2003       op = "fmin";
2004       break;
2005    case nir_intrinsic_ssbo_atomic_fmax:
2006       op = "fmax";
2007       break;
2008    default:
2009       abort();
2010    }
2011 
2012    descriptor = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true, false);
2013 
2014    if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap && return_type == ctx->ac.i64) {
2015       result = emit_ssbo_comp_swap_64(ctx, descriptor, get_src(ctx, instr->src[1]),
2016                                       get_src(ctx, instr->src[2]), get_src(ctx, instr->src[3]), false);
2017    } else {
2018       LLVMValueRef data = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2019 
2020       if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
2021          params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
2022       }
2023       if (instr->intrinsic == nir_intrinsic_ssbo_atomic_fmin ||
2024           instr->intrinsic == nir_intrinsic_ssbo_atomic_fmax) {
2025          data = ac_to_float(&ctx->ac, data);
2026          return_type = LLVMTypeOf(data);
2027       }
2028       params[arg_count++] = data;
2029       params[arg_count++] = descriptor;
2030       params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
2031       params[arg_count++] = ctx->ac.i32_0;               /* soffset */
2032       params[arg_count++] = ctx->ac.i32_0;               /* slc */
2033 
2034       ac_build_type_name_for_intr(return_type, type, sizeof(type));
2035       snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
2036 
2037       result = ac_build_intrinsic(&ctx->ac, name, return_type, params, arg_count, 0);
2038 
2039       if (instr->intrinsic == nir_intrinsic_ssbo_atomic_fmin ||
2040           instr->intrinsic == nir_intrinsic_ssbo_atomic_fmax) {
2041          result = ac_to_integer(&ctx->ac, result);
2042       }
2043    }
2044 
2045    result = exit_waterfall(ctx, &wctx, result);
2046    if (ctx->ac.postponed_kill)
2047       ac_build_endif(&ctx->ac, 7001);
2048    return result;
2049 }
2050 
visit_load_buffer(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2051 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2052 {
2053    struct waterfall_context wctx;
2054    LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
2055 
2056    int elem_size_bytes = instr->dest.ssa.bit_size / 8;
2057    int num_components = instr->num_components;
2058    enum gl_access_qualifier access = nir_intrinsic_access(instr);
2059    unsigned cache_policy = get_cache_policy(ctx, access, false, false);
2060 
2061    LLVMValueRef offset = get_src(ctx, instr->src[1]);
2062    LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false, false);
2063    LLVMValueRef vindex = ctx->ac.i32_0;
2064 
2065    LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
2066    LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
2067 
2068    LLVMValueRef results[4];
2069    for (int i = 0; i < num_components;) {
2070       int num_elems = num_components - i;
2071       if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
2072          num_elems = 1;
2073       if (num_elems * elem_size_bytes > 16)
2074          num_elems = 16 / elem_size_bytes;
2075       int load_bytes = num_elems * elem_size_bytes;
2076 
2077       LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
2078 
2079       LLVMValueRef ret;
2080 
2081       if (load_bytes == 1) {
2082          ret = ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,
2083                                           cache_policy);
2084       } else if (load_bytes == 2) {
2085          ret = ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,
2086                                            cache_policy);
2087       } else {
2088          int num_channels = util_next_power_of_two(load_bytes) / 4;
2089          bool can_speculate = access & ACCESS_CAN_REORDER;
2090 
2091          ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, offset, immoffset, 0,
2092                                     ctx->ac.f32, cache_policy, can_speculate, false);
2093       }
2094 
2095       LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
2096       ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
2097       ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
2098 
2099       LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
2100       ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
2101 
2102       for (unsigned j = 0; j < num_elems; j++) {
2103          results[i + j] =
2104             LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
2105       }
2106       i += num_elems;
2107    }
2108 
2109    LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
2110    return exit_waterfall(ctx, &wctx, ret);
2111 }
2112 
enter_waterfall_ubo(struct ac_nir_context * ctx,struct waterfall_context * wctx,const nir_intrinsic_instr * instr)2113 static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
2114                                         const nir_intrinsic_instr *instr)
2115 {
2116    return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
2117                           nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
2118 }
2119 
visit_load_global(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2120 static LLVMValueRef visit_load_global(struct ac_nir_context *ctx,
2121                                       nir_intrinsic_instr *instr)
2122 {
2123    LLVMValueRef addr = get_src(ctx, instr->src[0]);
2124    LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
2125    LLVMValueRef val;
2126 
2127    LLVMTypeRef ptr_type = LLVMPointerType(result_type, AC_ADDR_SPACE_GLOBAL);
2128 
2129    addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
2130 
2131    val = LLVMBuildLoad(ctx->ac.builder, addr, "");
2132 
2133    if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {
2134       LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
2135       LLVMSetAlignment(val, ac_get_type_size(result_type));
2136    }
2137 
2138    return val;
2139 }
2140 
visit_store_global(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2141 static void visit_store_global(struct ac_nir_context *ctx,
2142 				     nir_intrinsic_instr *instr)
2143 {
2144    if (ctx->ac.postponed_kill) {
2145       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2146       ac_build_ifcc(&ctx->ac, cond, 7002);
2147    }
2148 
2149    LLVMValueRef data = get_src(ctx, instr->src[0]);
2150    LLVMValueRef addr = get_src(ctx, instr->src[1]);
2151    LLVMTypeRef type = LLVMTypeOf(data);
2152    LLVMValueRef val;
2153 
2154    LLVMTypeRef ptr_type = LLVMPointerType(type, AC_ADDR_SPACE_GLOBAL);
2155 
2156    addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
2157 
2158    val = LLVMBuildStore(ctx->ac.builder, data, addr);
2159 
2160    if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {
2161       LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
2162       LLVMSetAlignment(val, ac_get_type_size(type));
2163    }
2164 
2165    if (ctx->ac.postponed_kill)
2166       ac_build_endif(&ctx->ac, 7002);
2167 }
2168 
visit_global_atomic(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2169 static LLVMValueRef visit_global_atomic(struct ac_nir_context *ctx,
2170 					nir_intrinsic_instr *instr)
2171 {
2172    if (ctx->ac.postponed_kill) {
2173       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2174       ac_build_ifcc(&ctx->ac, cond, 7002);
2175    }
2176 
2177    LLVMValueRef addr = get_src(ctx, instr->src[0]);
2178    LLVMValueRef data = get_src(ctx, instr->src[1]);
2179    LLVMAtomicRMWBinOp op;
2180    LLVMValueRef result;
2181 
2182    /* use "singlethread" sync scope to implement relaxed ordering */
2183    const char *sync_scope = "singlethread-one-as";
2184 
2185    if (instr->intrinsic == nir_intrinsic_global_atomic_fmin ||
2186        instr->intrinsic == nir_intrinsic_global_atomic_fmax) {
2187       data = ac_to_float(&ctx->ac, data);
2188    }
2189 
2190    LLVMTypeRef data_type = LLVMTypeOf(data);
2191    LLVMTypeRef ptr_type = LLVMPointerType(data_type, AC_ADDR_SPACE_GLOBAL);
2192 
2193    addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
2194 
2195    if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap) {
2196       LLVMValueRef data1 = get_src(ctx, instr->src[2]);
2197       result = ac_build_atomic_cmp_xchg(&ctx->ac, addr, data, data1, sync_scope);
2198       result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
2199    } else if (instr->intrinsic == nir_intrinsic_global_atomic_fmin ||
2200               instr->intrinsic == nir_intrinsic_global_atomic_fmax) {
2201       const char *op = instr->intrinsic == nir_intrinsic_global_atomic_fmin ? "fmin" : "fmax";
2202       char name[64], type[8];
2203       LLVMValueRef params[2];
2204       int arg_count = 0;
2205 
2206       params[arg_count++] = addr;
2207       params[arg_count++] = data;
2208 
2209       ac_build_type_name_for_intr(data_type, type, sizeof(type));
2210       snprintf(name, sizeof(name), "llvm.amdgcn.global.atomic.%s.%s.p1%s.%s", op, type, type, type);
2211 
2212       result = ac_build_intrinsic(&ctx->ac, name, data_type, params, arg_count, 0);
2213       result = ac_to_integer(&ctx->ac, result);
2214    } else {
2215       switch (instr->intrinsic) {
2216       case nir_intrinsic_global_atomic_add:
2217          op = LLVMAtomicRMWBinOpAdd;
2218          break;
2219       case nir_intrinsic_global_atomic_umin:
2220          op = LLVMAtomicRMWBinOpUMin;
2221          break;
2222       case nir_intrinsic_global_atomic_umax:
2223          op = LLVMAtomicRMWBinOpUMax;
2224          break;
2225       case nir_intrinsic_global_atomic_imin:
2226          op = LLVMAtomicRMWBinOpMin;
2227          break;
2228       case nir_intrinsic_global_atomic_imax:
2229          op = LLVMAtomicRMWBinOpMax;
2230          break;
2231       case nir_intrinsic_global_atomic_and:
2232          op = LLVMAtomicRMWBinOpAnd;
2233          break;
2234       case nir_intrinsic_global_atomic_or:
2235          op = LLVMAtomicRMWBinOpOr;
2236          break;
2237       case nir_intrinsic_global_atomic_xor:
2238          op = LLVMAtomicRMWBinOpXor;
2239          break;
2240       case nir_intrinsic_global_atomic_exchange:
2241          op = LLVMAtomicRMWBinOpXchg;
2242          break;
2243       default:
2244          unreachable("Invalid global atomic operation");
2245       }
2246 
2247       result = ac_build_atomic_rmw(&ctx->ac, op, addr, ac_to_integer(&ctx->ac, data), sync_scope);
2248    }
2249 
2250    if (ctx->ac.postponed_kill)
2251       ac_build_endif(&ctx->ac, 7002);
2252 
2253    return result;
2254 }
2255 
visit_load_ubo_buffer(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2256 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2257 {
2258    struct waterfall_context wctx;
2259    LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
2260 
2261    LLVMValueRef ret;
2262    LLVMValueRef rsrc = rsrc_base;
2263    LLVMValueRef offset = get_src(ctx, instr->src[1]);
2264    int num_components = instr->num_components;
2265 
2266    if (ctx->abi->load_ubo)
2267       rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
2268 
2269    /* Convert to a scalar 32-bit load. */
2270    if (instr->dest.ssa.bit_size == 64)
2271       num_components *= 2;
2272    else if (instr->dest.ssa.bit_size == 16)
2273       num_components = DIV_ROUND_UP(num_components, 2);
2274    else if (instr->dest.ssa.bit_size == 8)
2275       num_components = DIV_ROUND_UP(num_components, 4);
2276 
2277    ret =
2278       ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, 0,
2279                            ctx->ac.f32, 0, true, true);
2280 
2281    /* Convert to the original type. */
2282    if (instr->dest.ssa.bit_size == 64) {
2283       ret = LLVMBuildBitCast(ctx->ac.builder, ret,
2284                              LLVMVectorType(ctx->ac.i64, num_components / 2), "");
2285    } else if (instr->dest.ssa.bit_size == 16) {
2286       ret = LLVMBuildBitCast(ctx->ac.builder, ret,
2287                              LLVMVectorType(ctx->ac.i16, num_components * 2), "");
2288    } else if (instr->dest.ssa.bit_size == 8) {
2289       ret = LLVMBuildBitCast(ctx->ac.builder, ret,
2290                              LLVMVectorType(ctx->ac.i8, num_components * 4), "");
2291    }
2292 
2293    ret = ac_trim_vector(&ctx->ac, ret, instr->num_components);
2294    ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2295 
2296    return exit_waterfall(ctx, &wctx, ret);
2297 }
2298 
type_scalar_size_bytes(const struct glsl_type * type)2299 static unsigned type_scalar_size_bytes(const struct glsl_type *type)
2300 {
2301    assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));
2302    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
2303 }
2304 
visit_store_output(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2305 static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2306 {
2307    if (ctx->ac.postponed_kill) {
2308       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2309       ac_build_ifcc(&ctx->ac, cond, 7002);
2310    }
2311 
2312    unsigned base = nir_intrinsic_base(instr);
2313    unsigned writemask = nir_intrinsic_write_mask(instr);
2314    unsigned component = nir_intrinsic_component(instr);
2315    LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
2316    nir_src offset = *nir_get_io_offset_src(instr);
2317    LLVMValueRef indir_index = NULL;
2318 
2319    if (nir_src_is_const(offset))
2320       assert(nir_src_as_uint(offset) == 0);
2321    else
2322       indir_index = get_src(ctx, offset);
2323 
2324    switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
2325    case 16:
2326    case 32:
2327       break;
2328    case 64:
2329       unreachable("64-bit IO should have been lowered to 32 bits");
2330       return;
2331    default:
2332       unreachable("unhandled store_output bit size");
2333       return;
2334    }
2335 
2336    writemask <<= component;
2337 
2338    if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2339       nir_src *vertex_index_src = nir_get_io_arrayed_index_src(instr);
2340       LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
2341       unsigned location = nir_intrinsic_io_semantics(instr).location;
2342 
2343       ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, src,
2344                                   writemask, component, location, base);
2345       return;
2346    }
2347 
2348    /* No indirect indexing is allowed after this point. */
2349    assert(!indir_index);
2350 
2351    for (unsigned chan = 0; chan < 8; chan++) {
2352       if (!(writemask & (1 << chan)))
2353          continue;
2354 
2355       LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
2356       LLVMValueRef output_addr = ctx->abi->outputs[base * 4 + chan];
2357 
2358       if (LLVMGetElementType(LLVMTypeOf(output_addr)) == ctx->ac.f32 &&
2359           LLVMTypeOf(value) == ctx->ac.f16) {
2360          LLVMValueRef output, index;
2361 
2362          /* Insert the 16-bit value into the low or high bits of the 32-bit output
2363           * using read-modify-write.
2364           */
2365          index = LLVMConstInt(ctx->ac.i32, nir_intrinsic_io_semantics(instr).high_16bits, 0);
2366          output = LLVMBuildLoad(ctx->ac.builder, output_addr, "");
2367          output = LLVMBuildBitCast(ctx->ac.builder, output, ctx->ac.v2f16, "");
2368          output = LLVMBuildInsertElement(ctx->ac.builder, output, value, index, "");
2369          value = LLVMBuildBitCast(ctx->ac.builder, output, ctx->ac.f32, "");
2370       }
2371       LLVMBuildStore(ctx->ac.builder, value, output_addr);
2372    }
2373 
2374    if (ctx->ac.postponed_kill)
2375       ac_build_endif(&ctx->ac, 7002);
2376 }
2377 
image_type_to_components_count(enum glsl_sampler_dim dim,bool array)2378 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2379 {
2380    switch (dim) {
2381    case GLSL_SAMPLER_DIM_BUF:
2382       return 1;
2383    case GLSL_SAMPLER_DIM_1D:
2384       return array ? 2 : 1;
2385    case GLSL_SAMPLER_DIM_2D:
2386       return array ? 3 : 2;
2387    case GLSL_SAMPLER_DIM_MS:
2388       return array ? 4 : 3;
2389    case GLSL_SAMPLER_DIM_3D:
2390    case GLSL_SAMPLER_DIM_CUBE:
2391       return 3;
2392    case GLSL_SAMPLER_DIM_RECT:
2393    case GLSL_SAMPLER_DIM_SUBPASS:
2394       return 2;
2395    case GLSL_SAMPLER_DIM_SUBPASS_MS:
2396       return 3;
2397    default:
2398       break;
2399    }
2400    return 0;
2401 }
2402 
adjust_sample_index_using_fmask(struct ac_llvm_context * ctx,LLVMValueRef coord_x,LLVMValueRef coord_y,LLVMValueRef coord_z,LLVMValueRef sample_index,LLVMValueRef fmask_desc_ptr)2403 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
2404                                                     LLVMValueRef coord_x, LLVMValueRef coord_y,
2405                                                     LLVMValueRef coord_z, LLVMValueRef sample_index,
2406                                                     LLVMValueRef fmask_desc_ptr)
2407 {
2408    if (!fmask_desc_ptr)
2409       return sample_index;
2410 
2411    unsigned sample_chan = coord_z ? 3 : 2;
2412    LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
2413    addr[sample_chan] = sample_index;
2414 
2415    ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
2416    return addr[sample_chan];
2417 }
2418 
get_image_deref(const nir_intrinsic_instr * instr)2419 static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
2420 {
2421    assert(instr->src[0].is_ssa);
2422    return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2423 }
2424 
get_image_descriptor(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,LLVMValueRef dynamic_index,enum ac_descriptor_type desc_type,bool write)2425 static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
2426                                          const nir_intrinsic_instr *instr,
2427                                          LLVMValueRef dynamic_index,
2428                                          enum ac_descriptor_type desc_type, bool write)
2429 {
2430    nir_deref_instr *deref_instr = instr->src[0].ssa->parent_instr->type == nir_instr_type_deref
2431                                      ? nir_instr_as_deref(instr->src[0].ssa->parent_instr)
2432                                      : NULL;
2433 
2434    return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
2435 }
2436 
get_image_coords(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,LLVMValueRef dynamic_desc_index,struct ac_image_args * args,enum glsl_sampler_dim dim,bool is_array)2437 static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2438                              LLVMValueRef dynamic_desc_index, struct ac_image_args *args,
2439                              enum glsl_sampler_dim dim, bool is_array)
2440 {
2441    LLVMValueRef src0 = get_src(ctx, instr->src[1]);
2442    LLVMValueRef masks[] = {
2443       LLVMConstInt(ctx->ac.i32, 0, false),
2444       LLVMConstInt(ctx->ac.i32, 1, false),
2445       LLVMConstInt(ctx->ac.i32, 2, false),
2446       LLVMConstInt(ctx->ac.i32, 3, false),
2447    };
2448    LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2449 
2450    int count;
2451    ASSERTED bool add_frag_pos =
2452       (dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2453    bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2454    bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
2455    assert(!add_frag_pos && "Input attachments should be lowered by this point.");
2456    count = image_type_to_components_count(dim, is_array);
2457 
2458    if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
2459                  instr->intrinsic == nir_intrinsic_bindless_image_load ||
2460                  instr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
2461                  instr->intrinsic == nir_intrinsic_bindless_image_sparse_load)) {
2462       LLVMValueRef fmask_load_address[3];
2463 
2464       fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2465       fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
2466       if (is_array)
2467          fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
2468       else
2469          fmask_load_address[2] = NULL;
2470 
2471       sample_index = adjust_sample_index_using_fmask(
2472          &ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2],
2473          sample_index, get_image_descriptor(ctx, instr, dynamic_desc_index, AC_DESC_FMASK, false));
2474    }
2475    if (count == 1 && !gfx9_1d) {
2476       if (instr->src[1].ssa->num_components)
2477          args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2478       else
2479          args->coords[0] = src0;
2480    } else {
2481       int chan;
2482       if (is_ms)
2483          count--;
2484       for (chan = 0; chan < count; ++chan) {
2485          args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
2486       }
2487 
2488       if (gfx9_1d) {
2489          if (is_array) {
2490             args->coords[2] = args->coords[1];
2491             args->coords[1] = ctx->ac.i32_0;
2492          } else
2493             args->coords[1] = ctx->ac.i32_0;
2494          count++;
2495       }
2496       if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_2D && !is_array) {
2497          /* The hw can't bind a slice of a 3D image as a 2D
2498           * image, because it ignores BASE_ARRAY if the target
2499           * is 3D. The workaround is to read BASE_ARRAY and set
2500           * it as the 3rd address operand for all 2D images.
2501           */
2502          LLVMValueRef first_layer, const5, mask;
2503 
2504          const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
2505          mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
2506          first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
2507          first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
2508 
2509          args->coords[count] = first_layer;
2510          count++;
2511       }
2512 
2513       if (is_ms) {
2514          args->coords[count] = sample_index;
2515          count++;
2516       }
2517    }
2518 }
2519 
enter_waterfall_image(struct ac_nir_context * ctx,struct waterfall_context * wctx,const nir_intrinsic_instr * instr)2520 static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
2521                                           struct waterfall_context *wctx,
2522                                           const nir_intrinsic_instr *instr)
2523 {
2524    nir_deref_instr *deref_instr = NULL;
2525 
2526    if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
2527       deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2528 
2529    LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
2530    return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
2531 }
2532 
visit_image_load(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,bool bindless)2533 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2534                                      bool bindless)
2535 {
2536    LLVMValueRef res;
2537 
2538    enum glsl_sampler_dim dim;
2539    enum gl_access_qualifier access = nir_intrinsic_access(instr);
2540    bool is_array;
2541    if (bindless) {
2542       dim = nir_intrinsic_image_dim(instr);
2543       is_array = nir_intrinsic_image_array(instr);
2544    } else {
2545       const nir_deref_instr *image_deref = get_image_deref(instr);
2546       const struct glsl_type *type = image_deref->type;
2547       const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2548       dim = glsl_get_sampler_dim(type);
2549       access |= var->data.access;
2550       is_array = glsl_sampler_type_is_array(type);
2551    }
2552 
2553    struct waterfall_context wctx;
2554    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2555 
2556    struct ac_image_args args = {0};
2557 
2558    args.cache_policy = get_cache_policy(ctx, access, false, false);
2559    args.tfe = instr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
2560               instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
2561 
2562    if (dim == GLSL_SAMPLER_DIM_BUF) {
2563       unsigned num_channels = util_last_bit(nir_ssa_def_components_read(&instr->dest.ssa));
2564       if (instr->dest.ssa.bit_size == 64)
2565          num_channels = num_channels < 4 ? 2 : 4;
2566       LLVMValueRef rsrc, vindex;
2567 
2568       rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false);
2569       vindex =
2570          LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
2571 
2572       assert(instr->dest.is_ssa);
2573       bool can_speculate = access & ACCESS_CAN_REORDER;
2574       res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels,
2575                                         args.cache_policy, can_speculate,
2576                                         instr->dest.ssa.bit_size == 16,
2577                                         args.tfe);
2578       res = ac_build_expand(&ctx->ac, res, num_channels, args.tfe ? 5 : 4);
2579 
2580       res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
2581       res = ac_to_integer(&ctx->ac, res);
2582    } else {
2583       bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
2584 
2585       args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
2586       args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2587       get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2588       args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2589       if (!level_zero)
2590          args.lod = get_src(ctx, instr->src[3]);
2591       args.dmask = 15;
2592       args.attributes = AC_FUNC_ATTR_READONLY;
2593 
2594       assert(instr->dest.is_ssa);
2595       args.d16 = instr->dest.ssa.bit_size == 16;
2596 
2597       res = ac_build_image_opcode(&ctx->ac, &args);
2598    }
2599 
2600    if (instr->dest.ssa.bit_size == 64) {
2601       LLVMValueRef code = NULL;
2602       if (args.tfe) {
2603          code = ac_llvm_extract_elem(&ctx->ac, res, 4);
2604          res = ac_trim_vector(&ctx->ac, res, 4);
2605       }
2606 
2607       res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i64, 2), "");
2608       LLVMValueRef x = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_0, "");
2609       LLVMValueRef w = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_1, "");
2610 
2611       if (code)
2612          code = LLVMBuildZExt(ctx->ac.builder, code, ctx->ac.i64, "");
2613       LLVMValueRef values[5] = {x, ctx->ac.i64_0, ctx->ac.i64_0, w, code};
2614       res = ac_build_gather_values(&ctx->ac, values, 4 + args.tfe);
2615    }
2616 
2617    return exit_waterfall(ctx, &wctx, res);
2618 }
2619 
visit_image_store(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,bool bindless)2620 static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2621                               bool bindless)
2622 {
2623    if (ctx->ac.postponed_kill) {
2624       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2625       ac_build_ifcc(&ctx->ac, cond, 7003);
2626    }
2627 
2628    enum glsl_sampler_dim dim;
2629    enum gl_access_qualifier access = nir_intrinsic_access(instr);
2630    bool is_array;
2631 
2632    if (bindless) {
2633       dim = nir_intrinsic_image_dim(instr);
2634       is_array = nir_intrinsic_image_array(instr);
2635    } else {
2636       const nir_deref_instr *image_deref = get_image_deref(instr);
2637       const struct glsl_type *type = image_deref->type;
2638       const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2639       dim = glsl_get_sampler_dim(type);
2640       access |= var->data.access;
2641       is_array = glsl_sampler_type_is_array(type);
2642    }
2643 
2644    struct waterfall_context wctx;
2645    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2646 
2647    bool writeonly_memory = access & ACCESS_NON_READABLE;
2648    struct ac_image_args args = {0};
2649 
2650    args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
2651 
2652    LLVMValueRef src = get_src(ctx, instr->src[3]);
2653    if (instr->src[3].ssa->bit_size == 64) {
2654       /* only R64_UINT and R64_SINT supported */
2655       src = ac_llvm_extract_elem(&ctx->ac, src, 0);
2656       src = LLVMBuildBitCast(ctx->ac.builder, src, ctx->ac.v2f32, "");
2657    } else {
2658       src = ac_to_float(&ctx->ac, src);
2659    }
2660 
2661    if (dim == GLSL_SAMPLER_DIM_BUF) {
2662       LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, true);
2663       unsigned src_channels = ac_get_llvm_num_components(src);
2664       LLVMValueRef vindex;
2665 
2666       if (src_channels == 3)
2667          src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
2668 
2669       vindex =
2670          LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
2671 
2672       ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.cache_policy);
2673    } else {
2674       bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
2675 
2676       args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
2677       args.data[0] = src;
2678       args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
2679       get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2680       args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2681       if (!level_zero)
2682          args.lod = get_src(ctx, instr->src[4]);
2683       args.dmask = 15;
2684       args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
2685 
2686       ac_build_image_opcode(&ctx->ac, &args);
2687    }
2688 
2689    exit_waterfall(ctx, &wctx, NULL);
2690    if (ctx->ac.postponed_kill)
2691       ac_build_endif(&ctx->ac, 7003);
2692 }
2693 
visit_image_atomic(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,bool bindless)2694 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2695                                        bool bindless)
2696 {
2697    if (ctx->ac.postponed_kill) {
2698       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2699       ac_build_ifcc(&ctx->ac, cond, 7004);
2700    }
2701 
2702    LLVMValueRef params[7];
2703    int param_count = 0;
2704 
2705    bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
2706                   instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
2707    const char *atomic_name;
2708    char intrinsic_name[64];
2709    enum ac_atomic_op atomic_subop;
2710    ASSERTED int length;
2711 
2712    enum glsl_sampler_dim dim;
2713    bool is_array;
2714    if (bindless) {
2715       dim = nir_intrinsic_image_dim(instr);
2716       is_array = nir_intrinsic_image_array(instr);
2717    } else {
2718       const struct glsl_type *type = get_image_deref(instr)->type;
2719       dim = glsl_get_sampler_dim(type);
2720       is_array = glsl_sampler_type_is_array(type);
2721    }
2722 
2723    struct waterfall_context wctx;
2724    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2725 
2726    switch (instr->intrinsic) {
2727    case nir_intrinsic_bindless_image_atomic_add:
2728    case nir_intrinsic_image_deref_atomic_add:
2729       atomic_name = "add";
2730       atomic_subop = ac_atomic_add;
2731       break;
2732    case nir_intrinsic_bindless_image_atomic_imin:
2733    case nir_intrinsic_image_deref_atomic_imin:
2734       atomic_name = "smin";
2735       atomic_subop = ac_atomic_smin;
2736       break;
2737    case nir_intrinsic_bindless_image_atomic_umin:
2738    case nir_intrinsic_image_deref_atomic_umin:
2739       atomic_name = "umin";
2740       atomic_subop = ac_atomic_umin;
2741       break;
2742    case nir_intrinsic_bindless_image_atomic_imax:
2743    case nir_intrinsic_image_deref_atomic_imax:
2744       atomic_name = "smax";
2745       atomic_subop = ac_atomic_smax;
2746       break;
2747    case nir_intrinsic_bindless_image_atomic_umax:
2748    case nir_intrinsic_image_deref_atomic_umax:
2749       atomic_name = "umax";
2750       atomic_subop = ac_atomic_umax;
2751       break;
2752    case nir_intrinsic_bindless_image_atomic_and:
2753    case nir_intrinsic_image_deref_atomic_and:
2754       atomic_name = "and";
2755       atomic_subop = ac_atomic_and;
2756       break;
2757    case nir_intrinsic_bindless_image_atomic_or:
2758    case nir_intrinsic_image_deref_atomic_or:
2759       atomic_name = "or";
2760       atomic_subop = ac_atomic_or;
2761       break;
2762    case nir_intrinsic_bindless_image_atomic_xor:
2763    case nir_intrinsic_image_deref_atomic_xor:
2764       atomic_name = "xor";
2765       atomic_subop = ac_atomic_xor;
2766       break;
2767    case nir_intrinsic_bindless_image_atomic_exchange:
2768    case nir_intrinsic_image_deref_atomic_exchange:
2769       atomic_name = "swap";
2770       atomic_subop = ac_atomic_swap;
2771       break;
2772    case nir_intrinsic_bindless_image_atomic_comp_swap:
2773    case nir_intrinsic_image_deref_atomic_comp_swap:
2774       atomic_name = "cmpswap";
2775       atomic_subop = 0; /* not used */
2776       break;
2777    case nir_intrinsic_bindless_image_atomic_inc_wrap:
2778    case nir_intrinsic_image_deref_atomic_inc_wrap: {
2779       atomic_name = "inc";
2780       atomic_subop = ac_atomic_inc_wrap;
2781       break;
2782    }
2783    case nir_intrinsic_bindless_image_atomic_dec_wrap:
2784    case nir_intrinsic_image_deref_atomic_dec_wrap:
2785       atomic_name = "dec";
2786       atomic_subop = ac_atomic_dec_wrap;
2787       break;
2788    case nir_intrinsic_image_deref_atomic_fmin:
2789       atomic_name = "fmin";
2790       atomic_subop = ac_atomic_fmin;
2791       break;
2792    case nir_intrinsic_image_deref_atomic_fmax:
2793       atomic_name = "fmax";
2794       atomic_subop = ac_atomic_fmax;
2795       break;
2796    default:
2797       abort();
2798    }
2799 
2800    if (cmpswap)
2801       params[param_count++] = get_src(ctx, instr->src[4]);
2802    params[param_count++] = get_src(ctx, instr->src[3]);
2803 
2804    if (atomic_subop == ac_atomic_fmin || atomic_subop == ac_atomic_fmax)
2805       params[0] = ac_to_float(&ctx->ac, params[0]);
2806 
2807    LLVMValueRef result;
2808    if (dim == GLSL_SAMPLER_DIM_BUF) {
2809       params[param_count++] = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, true);
2810       params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
2811                                                       ctx->ac.i32_0, ""); /* vindex */
2812       params[param_count++] = ctx->ac.i32_0;                              /* voffset */
2813       if (cmpswap && instr->dest.ssa.bit_size == 64) {
2814          result = emit_ssbo_comp_swap_64(ctx, params[2], params[3], params[1], params[0], true);
2815       } else {
2816          LLVMTypeRef data_type = LLVMTypeOf(params[0]);
2817          char type[8];
2818 
2819          params[param_count++] = ctx->ac.i32_0; /* soffset */
2820          params[param_count++] = ctx->ac.i32_0; /* slc */
2821 
2822          ac_build_type_name_for_intr(data_type, type, sizeof(type));
2823          length = snprintf(intrinsic_name, sizeof(intrinsic_name),
2824                            "llvm.amdgcn.struct.buffer.atomic.%s.%s",
2825                            atomic_name, type);
2826 
2827          assert(length < sizeof(intrinsic_name));
2828          result = ac_build_intrinsic(&ctx->ac, intrinsic_name, LLVMTypeOf(params[0]), params, param_count, 0);
2829       }
2830    } else {
2831       struct ac_image_args args = {0};
2832       args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
2833       args.atomic = atomic_subop;
2834       args.data[0] = params[0];
2835       if (cmpswap)
2836          args.data[1] = params[1];
2837       args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
2838       get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2839       args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2840 
2841       result = ac_build_image_opcode(&ctx->ac, &args);
2842    }
2843 
2844    result = exit_waterfall(ctx, &wctx, result);
2845    if (ctx->ac.postponed_kill)
2846       ac_build_endif(&ctx->ac, 7004);
2847    return result;
2848 }
2849 
visit_image_samples(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)2850 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2851 {
2852    struct waterfall_context wctx;
2853    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2854    LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2855 
2856    LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
2857    if (ctx->abi->robust_buffer_access) {
2858       LLVMValueRef dword1, is_null_descriptor;
2859 
2860       /* Extract the second dword of the descriptor, if it's
2861        * all zero, then it's a null descriptor.
2862        */
2863       dword1 =
2864          LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, false), "");
2865       is_null_descriptor = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
2866                                          LLVMConstInt(ctx->ac.i32, 0, false), "");
2867       ret = LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, ctx->ac.i32_0, ret, "");
2868    }
2869 
2870    return exit_waterfall(ctx, &wctx, ret);
2871 }
2872 
visit_image_size(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,bool bindless)2873 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2874                                      bool bindless)
2875 {
2876    LLVMValueRef res;
2877 
2878    enum glsl_sampler_dim dim;
2879    bool is_array;
2880    if (bindless) {
2881       dim = nir_intrinsic_image_dim(instr);
2882       is_array = nir_intrinsic_image_array(instr);
2883    } else {
2884       const struct glsl_type *type = get_image_deref(instr)->type;
2885       dim = glsl_get_sampler_dim(type);
2886       is_array = glsl_sampler_type_is_array(type);
2887    }
2888 
2889    struct waterfall_context wctx;
2890    LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2891 
2892    if (dim == GLSL_SAMPLER_DIM_BUF) {
2893       res = get_buffer_size(
2894          ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
2895    } else {
2896 
2897       struct ac_image_args args = {0};
2898 
2899       args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2900       args.dmask = 0xf;
2901       args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2902       args.opcode = ac_image_get_resinfo;
2903       assert(nir_src_as_uint(instr->src[1]) == 0);
2904       args.lod = ctx->ac.i32_0;
2905       args.attributes = AC_FUNC_ATTR_READNONE;
2906 
2907       res = ac_build_image_opcode(&ctx->ac, &args);
2908 
2909       if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
2910          LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
2911          LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2912          res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, ctx->ac.i32_1, "");
2913       }
2914    }
2915    return exit_waterfall(ctx, &wctx, res);
2916 }
2917 
emit_membar(struct ac_llvm_context * ac,const nir_intrinsic_instr * instr)2918 static void emit_membar(struct ac_llvm_context *ac, const nir_intrinsic_instr *instr)
2919 {
2920    unsigned wait_flags = 0;
2921 
2922    switch (instr->intrinsic) {
2923    case nir_intrinsic_memory_barrier:
2924    case nir_intrinsic_group_memory_barrier:
2925       wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2926       break;
2927    case nir_intrinsic_memory_barrier_buffer:
2928    case nir_intrinsic_memory_barrier_image:
2929       wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2930       break;
2931    case nir_intrinsic_memory_barrier_shared:
2932       wait_flags = AC_WAIT_LGKM;
2933       break;
2934    default:
2935       break;
2936    }
2937 
2938    ac_build_waitcnt(ac, wait_flags);
2939 }
2940 
ac_emit_barrier(struct ac_llvm_context * ac,gl_shader_stage stage)2941 void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
2942 {
2943    /* GFX6 only (thanks to a hw bug workaround):
2944     * The real barrier instruction isn’t needed, because an entire patch
2945     * always fits into a single wave.
2946     */
2947    if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
2948       ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
2949       return;
2950    }
2951    ac_build_s_barrier(ac);
2952 }
2953 
emit_discard(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)2954 static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2955 {
2956    LLVMValueRef cond;
2957 
2958    if (instr->intrinsic == nir_intrinsic_discard_if ||
2959        instr->intrinsic == nir_intrinsic_terminate_if) {
2960       cond = LLVMBuildNot(ctx->ac.builder, get_src(ctx, instr->src[0]), "");
2961    } else {
2962       assert(instr->intrinsic == nir_intrinsic_discard ||
2963              instr->intrinsic == nir_intrinsic_terminate);
2964       cond = ctx->ac.i1false;
2965    }
2966 
2967    ac_build_kill_if_false(&ctx->ac, cond);
2968 }
2969 
emit_demote(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)2970 static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2971 {
2972    LLVMValueRef cond;
2973 
2974    if (instr->intrinsic == nir_intrinsic_demote_if) {
2975       cond = LLVMBuildNot(ctx->ac.builder, get_src(ctx, instr->src[0]), "");
2976    } else {
2977       assert(instr->intrinsic == nir_intrinsic_demote);
2978       cond = ctx->ac.i1false;
2979    }
2980 
2981    if (LLVM_VERSION_MAJOR >= 13) {
2982       /* This demotes the pixel if the condition is false. */
2983       ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.wqm.demote", ctx->ac.voidt, &cond, 1, 0);
2984       return;
2985    }
2986 
2987    LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2988    mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
2989    LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
2990 
2991    if (!ctx->info->fs.needs_all_helper_invocations) {
2992       /* This is an optional optimization that only kills whole inactive quads.
2993        * It's not used when subgroup operations can possibly use all helper
2994        * invocations.
2995        */
2996       if (ctx->ac.flow->depth == 0) {
2997          ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
2998       } else {
2999          /* amdgcn.wqm.vote doesn't work inside conditional blocks. Here's why.
3000           *
3001           * The problem is that kill(wqm.vote(0)) kills all active threads within
3002           * the block, which breaks the whole quad mode outside the block if
3003           * the conditional block has partially active quads (2x2 pixel blocks).
3004           * E.g. threads 0-3 are active outside the block, but only thread 0 is
3005           * active inside the block. Thread 0 shouldn't be killed by demote,
3006           * because threads 1-3 are still active outside the block.
3007           *
3008           * The fix for amdgcn.wqm.vote would be to return S_WQM((live & ~exec) | cond)
3009           * instead of S_WQM(cond).
3010           *
3011           * The less efficient workaround we do here is to save the kill condition
3012           * to a temporary (postponed_kill) and do kill(wqm.vote(cond)) after we
3013           * exit the conditional block.
3014           */
3015          ctx->ac.conditional_demote_seen = true;
3016       }
3017    }
3018 }
3019 
visit_load_local_invocation_index(struct ac_nir_context * ctx)3020 static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx)
3021 {
3022    if (ctx->args->vs_rel_patch_id.used) {
3023       return ac_get_arg(&ctx->ac, ctx->args->vs_rel_patch_id);
3024    } else if (ctx->args->merged_wave_info.used) {
3025       /* Thread ID in threadgroup in merged ESGS. */
3026       LLVMValueRef wave_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4);
3027       LLVMValueRef wave_size = LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false);
3028 	   LLVMValueRef threads_before = LLVMBuildMul(ctx->ac.builder, wave_id, wave_size, "");
3029 	   return LLVMBuildAdd(ctx->ac.builder, threads_before, ac_get_thread_id(&ctx->ac), "");
3030    }
3031 
3032    LLVMValueRef result;
3033    LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
3034    result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
3035                          LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
3036 
3037    if (ctx->ac.wave_size == 32)
3038       result = LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 1, false), "");
3039 
3040    return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
3041 }
3042 
visit_load_subgroup_id(struct ac_nir_context * ctx)3043 static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx)
3044 {
3045    if (ctx->stage == MESA_SHADER_COMPUTE) {
3046       LLVMValueRef result;
3047       result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
3048                             LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
3049       return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
3050    } else if (ctx->args->merged_wave_info.used) {
3051       return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4);
3052    } else {
3053       return LLVMConstInt(ctx->ac.i32, 0, false);
3054    }
3055 }
3056 
visit_load_num_subgroups(struct ac_nir_context * ctx)3057 static LLVMValueRef visit_load_num_subgroups(struct ac_nir_context *ctx)
3058 {
3059    if (ctx->stage == MESA_SHADER_COMPUTE) {
3060       return LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
3061                           LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
3062    } else {
3063       return LLVMConstInt(ctx->ac.i32, 1, false);
3064    }
3065 }
3066 
visit_first_invocation(struct ac_nir_context * ctx)3067 static LLVMValueRef visit_first_invocation(struct ac_nir_context *ctx)
3068 {
3069    LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
3070    const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
3071 
3072    /* The second argument is whether cttz(0) should be defined, but we do not care. */
3073    LLVMValueRef args[] = {active_set, ctx->ac.i1false};
3074    LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, ctx->ac.iN_wavemask, args, 2,
3075                                             AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE);
3076 
3077    return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
3078 }
3079 
visit_load_shared(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)3080 static LLVMValueRef visit_load_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
3081 {
3082    LLVMValueRef values[4], derived_ptr, index, ret;
3083    unsigned const_off = nir_intrinsic_base(instr);
3084 
3085    LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->dest.ssa.bit_size, const_off);
3086 
3087    for (int chan = 0; chan < instr->num_components; chan++) {
3088       index = LLVMConstInt(ctx->ac.i32, chan, 0);
3089       derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
3090       values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
3091    }
3092 
3093    ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
3094 
3095    return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
3096 }
3097 
visit_store_shared(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr)3098 static void visit_store_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
3099 {
3100    LLVMValueRef derived_ptr, data, index;
3101    LLVMBuilderRef builder = ctx->ac.builder;
3102 
3103    unsigned const_off = nir_intrinsic_base(instr);
3104    LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], instr->src[0].ssa->bit_size, const_off);
3105    LLVMValueRef src = get_src(ctx, instr->src[0]);
3106 
3107    int writemask = nir_intrinsic_write_mask(instr);
3108    for (int chan = 0; chan < 4; chan++) {
3109       if (!(writemask & (1 << chan))) {
3110          continue;
3111       }
3112       data = ac_llvm_extract_elem(&ctx->ac, src, chan);
3113       index = LLVMConstInt(ctx->ac.i32, chan, 0);
3114       derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
3115       LLVMBuildStore(builder, data, derived_ptr);
3116    }
3117 }
3118 
visit_var_atomic(struct ac_nir_context * ctx,const nir_intrinsic_instr * instr,LLVMValueRef ptr,int src_idx)3119 static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
3120                                      LLVMValueRef ptr, int src_idx)
3121 {
3122    if (ctx->ac.postponed_kill) {
3123       LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
3124       ac_build_ifcc(&ctx->ac, cond, 7005);
3125    }
3126 
3127    LLVMValueRef result;
3128    LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
3129 
3130    const char *sync_scope = "workgroup-one-as";
3131 
3132    if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) {
3133       LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
3134       result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
3135       result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
3136    } else if (instr->intrinsic == nir_intrinsic_shared_atomic_fmin ||
3137               instr->intrinsic == nir_intrinsic_shared_atomic_fmax) {
3138       const char *op = instr->intrinsic == nir_intrinsic_shared_atomic_fmin ? "fmin" : "fmax";
3139       char name[64], type[8];
3140       LLVMValueRef params[5];
3141       LLVMTypeRef src_type;
3142       int arg_count = 0;
3143 
3144       src = ac_to_float(&ctx->ac, src);
3145       src_type = LLVMTypeOf(src);
3146 
3147       LLVMTypeRef ptr_type =
3148          LLVMPointerType(src_type, LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
3149       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, "");
3150 
3151       params[arg_count++] = ptr;
3152       params[arg_count++] = src;
3153       params[arg_count++] = ctx->ac.i32_0;
3154       params[arg_count++] = ctx->ac.i32_0;
3155       params[arg_count++] = ctx->ac.i1false;
3156 
3157       ac_build_type_name_for_intr(src_type, type, sizeof(type));
3158       snprintf(name, sizeof(name), "llvm.amdgcn.ds.%s.%s", op, type);
3159 
3160       result = ac_build_intrinsic(&ctx->ac, name, src_type, params, arg_count, 0);
3161       result = ac_to_integer(&ctx->ac, result);
3162    } else {
3163       LLVMAtomicRMWBinOp op;
3164       switch (instr->intrinsic) {
3165       case nir_intrinsic_shared_atomic_add:
3166          op = LLVMAtomicRMWBinOpAdd;
3167          break;
3168       case nir_intrinsic_shared_atomic_umin:
3169          op = LLVMAtomicRMWBinOpUMin;
3170          break;
3171       case nir_intrinsic_shared_atomic_umax:
3172          op = LLVMAtomicRMWBinOpUMax;
3173          break;
3174       case nir_intrinsic_shared_atomic_imin:
3175          op = LLVMAtomicRMWBinOpMin;
3176          break;
3177       case nir_intrinsic_shared_atomic_imax:
3178          op = LLVMAtomicRMWBinOpMax;
3179          break;
3180       case nir_intrinsic_shared_atomic_and:
3181          op = LLVMAtomicRMWBinOpAnd;
3182          break;
3183       case nir_intrinsic_shared_atomic_or:
3184          op = LLVMAtomicRMWBinOpOr;
3185          break;
3186       case nir_intrinsic_shared_atomic_xor:
3187          op = LLVMAtomicRMWBinOpXor;
3188          break;
3189       case nir_intrinsic_shared_atomic_exchange:
3190          op = LLVMAtomicRMWBinOpXchg;
3191          break;
3192       case nir_intrinsic_shared_atomic_fadd:
3193          op = LLVMAtomicRMWBinOpFAdd;
3194          break;
3195       default:
3196          return NULL;
3197       }
3198 
3199       LLVMValueRef val;
3200 
3201       if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd) {
3202          val = ac_to_float(&ctx->ac, src);
3203 
3204          LLVMTypeRef ptr_type =
3205             LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
3206          ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, "");
3207       } else {
3208          val = ac_to_integer(&ctx->ac, src);
3209       }
3210 
3211       result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
3212 
3213       if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
3214           instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
3215          result = ac_to_integer(&ctx->ac, result);
3216       }
3217    }
3218 
3219    if (ctx->ac.postponed_kill)
3220       ac_build_endif(&ctx->ac, 7005);
3221    return result;
3222 }
3223 
load_sample_pos(struct ac_nir_context * ctx)3224 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
3225 {
3226    LLVMValueRef values[2];
3227    LLVMValueRef pos[2];
3228 
3229    pos[0] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]));
3230    pos[1] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]));
3231 
3232    values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
3233    values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
3234    return ac_build_gather_values(&ctx->ac, values, 2);
3235 }
3236 
lookup_interp_param(struct ac_nir_context * ctx,enum glsl_interp_mode interp,unsigned location)3237 static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx, enum glsl_interp_mode interp,
3238                                         unsigned location)
3239 {
3240    switch (interp) {
3241    case INTERP_MODE_FLAT:
3242    default:
3243       return NULL;
3244    case INTERP_MODE_SMOOTH:
3245    case INTERP_MODE_NONE:
3246       if (location == INTERP_CENTER)
3247          return ac_get_arg(&ctx->ac, ctx->args->persp_center);
3248       else if (location == INTERP_CENTROID)
3249          return ctx->abi->persp_centroid;
3250       else if (location == INTERP_SAMPLE)
3251          return ac_get_arg(&ctx->ac, ctx->args->persp_sample);
3252       break;
3253    case INTERP_MODE_NOPERSPECTIVE:
3254       if (location == INTERP_CENTER)
3255          return ac_get_arg(&ctx->ac, ctx->args->linear_center);
3256       else if (location == INTERP_CENTROID)
3257          return ctx->abi->linear_centroid;
3258       else if (location == INTERP_SAMPLE)
3259          return ac_get_arg(&ctx->ac, ctx->args->linear_sample);
3260       break;
3261    }
3262    return NULL;
3263 }
3264 
barycentric_center(struct ac_nir_context * ctx,unsigned mode)3265 static LLVMValueRef barycentric_center(struct ac_nir_context *ctx, unsigned mode)
3266 {
3267    LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
3268    return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3269 }
3270 
barycentric_offset(struct ac_nir_context * ctx,unsigned mode,LLVMValueRef offset)3271 static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx, unsigned mode,
3272                                        LLVMValueRef offset)
3273 {
3274    LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
3275    LLVMValueRef src_c0 =
3276       ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
3277    LLVMValueRef src_c1 =
3278       ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
3279 
3280    LLVMValueRef ij_out[2];
3281    LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
3282 
3283    /*
3284     * take the I then J parameters, and the DDX/Y for it, and
3285     * calculate the IJ inputs for the interpolator.
3286     * temp1 = ddx * offset/sample.x + I;
3287     * interp_param.I = ddy * offset/sample.y + temp1;
3288     * temp1 = ddx * offset/sample.x + J;
3289     * interp_param.J = ddy * offset/sample.y + temp1;
3290     */
3291    for (unsigned i = 0; i < 2; i++) {
3292       LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
3293       LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
3294       LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, ix_ll, "");
3295       LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, iy_ll, "");
3296       LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ix_ll, "");
3297       LLVMValueRef temp1, temp2;
3298 
3299       interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, ctx->ac.f32, "");
3300 
3301       temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
3302       temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
3303 
3304       ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, temp2, ctx->ac.i32, "");
3305    }
3306    interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
3307    return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3308 }
3309 
barycentric_centroid(struct ac_nir_context * ctx,unsigned mode)3310 static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx, unsigned mode)
3311 {
3312    LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
3313    return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3314 }
3315 
barycentric_at_sample(struct ac_nir_context * ctx,unsigned mode,LLVMValueRef sample_id)3316 static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx, unsigned mode,
3317                                           LLVMValueRef sample_id)
3318 {
3319    if (ctx->abi->interp_at_sample_force_center)
3320       return barycentric_center(ctx, mode);
3321 
3322    LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
3323 
3324    /* fetch sample ID */
3325    LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
3326 
3327    LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
3328    src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
3329    LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
3330    src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
3331    LLVMValueRef coords[] = {src_c0, src_c1};
3332    LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
3333 
3334    return barycentric_offset(ctx, mode, offset);
3335 }
3336 
barycentric_sample(struct ac_nir_context * ctx,unsigned mode)3337 static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx, unsigned mode)
3338 {
3339    LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
3340    return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3341 }
3342 
barycentric_model(struct ac_nir_context * ctx)3343 static LLVMValueRef barycentric_model(struct ac_nir_context *ctx)
3344 {
3345    return LLVMBuildBitCast(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->pull_model),
3346                            ctx->ac.v3i32, "");
3347 }
3348 
load_interpolated_input(struct ac_nir_context * ctx,LLVMValueRef interp_param,unsigned index,unsigned comp_start,unsigned num_components,unsigned bitsize,bool high_16bits)3349 static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, LLVMValueRef interp_param,
3350                                             unsigned index, unsigned comp_start,
3351                                             unsigned num_components, unsigned bitsize,
3352                                             bool high_16bits)
3353 {
3354    LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
3355    LLVMValueRef interp_param_f;
3356 
3357    interp_param_f = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, "");
3358    LLVMValueRef i = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
3359    LLVMValueRef j = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
3360 
3361    /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
3362    if (ctx->verified_interp && !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
3363       LLVMValueRef cond = ac_build_is_inf_or_nan(&ctx->ac, i);
3364       ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
3365       _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
3366    }
3367 
3368    LLVMValueRef values[4];
3369    assert(bitsize == 16 || bitsize == 32);
3370    for (unsigned comp = 0; comp < num_components; comp++) {
3371       LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
3372       if (bitsize == 16) {
3373          values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
3374                                                ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j,
3375                                                high_16bits);
3376       } else {
3377          values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
3378                                            ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
3379       }
3380    }
3381 
3382    return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
3383 }
3384 
visit_load(struct ac_nir_context * ctx,nir_intrinsic_instr * instr,bool is_output)3385 static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr,
3386                                bool is_output)
3387 {
3388    LLVMValueRef values[8];
3389    LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
3390    LLVMTypeRef component_type;
3391    unsigned base = nir_intrinsic_base(instr);
3392    unsigned component = nir_intrinsic_component(instr);
3393    unsigned count = instr->dest.ssa.num_components;
3394    nir_src *vertex_index_src = nir_get_io_arrayed_index_src(instr);
3395    LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
3396    nir_src offset = *nir_get_io_offset_src(instr);
3397    LLVMValueRef indir_index = NULL;
3398 
3399    switch (instr->dest.ssa.bit_size) {
3400    case 16:
3401    case 32:
3402       break;
3403    case 64:
3404       unreachable("64-bit IO should have been lowered");
3405       return NULL;
3406    default:
3407       unreachable("unhandled load type");
3408       return NULL;
3409    }
3410 
3411    if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
3412       component_type = LLVMGetElementType(dest_type);
3413    else
3414       component_type = dest_type;
3415 
3416    if (nir_src_is_const(offset))
3417       assert(nir_src_as_uint(offset) == 0);
3418    else
3419       indir_index = get_src(ctx, offset);
3420 
3421    if (ctx->stage == MESA_SHADER_TESS_CTRL ||
3422        (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
3423       bool vertex_index_is_invoc_id =
3424          vertex_index_src &&
3425          vertex_index_src->ssa->parent_instr->type == nir_instr_type_intrinsic &&
3426          nir_instr_as_intrinsic(vertex_index_src->ssa->parent_instr)->intrinsic ==
3427          nir_intrinsic_load_invocation_id;
3428 
3429       LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
3430                                                          vertex_index, indir_index,
3431                                                          base, component,
3432                                                          count, !is_output,
3433                                                          vertex_index_is_invoc_id);
3434       if (instr->dest.ssa.bit_size == 16) {
3435          result = ac_to_integer(&ctx->ac, result);
3436          result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
3437       }
3438       return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3439    }
3440 
3441    /* No indirect indexing is allowed after this point. */
3442    assert(!indir_index);
3443 
3444    if (ctx->stage == MESA_SHADER_GEOMETRY) {
3445       assert(nir_src_is_const(*vertex_index_src));
3446 
3447       return ctx->abi->load_inputs(ctx->abi, base, component, count,
3448                                    nir_src_as_uint(*vertex_index_src), component_type);
3449    }
3450 
3451    if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
3452        nir_intrinsic_io_semantics(instr).fb_fetch_output)
3453       return ctx->abi->emit_fbfetch(ctx->abi);
3454 
3455    if (ctx->stage == MESA_SHADER_VERTEX && !is_output)
3456       return ctx->abi->load_inputs(ctx->abi, base, component, count, 0, component_type);
3457 
3458    /* Other non-fragment cases have outputs in temporaries. */
3459    if (is_output && (ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL)) {
3460       assert(is_output);
3461 
3462       for (unsigned chan = component; chan < count + component; chan++)
3463          values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->abi->outputs[base * 4 + chan], "");
3464 
3465       LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
3466       return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3467    }
3468 
3469    /* Fragment shader inputs. */
3470    assert(ctx->stage == MESA_SHADER_FRAGMENT);
3471    unsigned vertex_id = 2; /* P0 */
3472 
3473    if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
3474       nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
3475 
3476       switch (src0[0].i32) {
3477       case 0:
3478          vertex_id = 2;
3479          break;
3480       case 1:
3481          vertex_id = 0;
3482          break;
3483       case 2:
3484          vertex_id = 1;
3485          break;
3486       default:
3487          unreachable("Invalid vertex index");
3488       }
3489    }
3490 
3491    LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
3492 
3493    for (unsigned chan = 0; chan < count; chan++) {
3494       LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
3495       values[chan] =
3496          ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), llvm_chan,
3497                                 attr_number, ac_get_arg(&ctx->ac, ctx->args->prim_mask));
3498       values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
3499       if (instr->dest.ssa.bit_size == 16 &&
3500           nir_intrinsic_io_semantics(instr).high_16bits)
3501          values[chan] = LLVMBuildLShr(ctx->ac.builder, values[chan], LLVMConstInt(ctx->ac.i32, 16, 0), "");
3502       values[chan] =
3503          LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
3504                                  instr->dest.ssa.bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
3505    }
3506 
3507    LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);
3508    return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3509 }
3510 
3511 static LLVMValueRef
emit_load_frag_shading_rate(struct ac_nir_context * ctx)3512 emit_load_frag_shading_rate(struct ac_nir_context *ctx)
3513 {
3514    LLVMValueRef x_rate, y_rate, cond;
3515 
3516    /* VRS Rate X = Ancillary[2:3]
3517     * VRS Rate Y = Ancillary[4:5]
3518     */
3519    x_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 2, 2);
3520    y_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 4, 2);
3521 
3522    /* xRate = xRate == 0x1 ? Horizontal2Pixels : None. */
3523    cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, x_rate, ctx->ac.i32_1, "");
3524    x_rate = LLVMBuildSelect(ctx->ac.builder, cond,
3525                             LLVMConstInt(ctx->ac.i32, 4, false), ctx->ac.i32_0, "");
3526 
3527    /* yRate = yRate == 0x1 ? Vertical2Pixels : None. */
3528    cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, y_rate, ctx->ac.i32_1, "");
3529    y_rate = LLVMBuildSelect(ctx->ac.builder, cond,
3530                             LLVMConstInt(ctx->ac.i32, 1, false), ctx->ac.i32_0, "");
3531 
3532    return LLVMBuildOr(ctx->ac.builder, x_rate, y_rate, "");
3533 }
3534 
3535 static LLVMValueRef
emit_load_frag_coord(struct ac_nir_context * ctx)3536 emit_load_frag_coord(struct ac_nir_context *ctx)
3537 {
3538    LLVMValueRef values[4] = {
3539       ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
3540       ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
3541       ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
3542 
3543    return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
3544 }
3545 
visit_intrinsic(struct ac_nir_context * ctx,nir_intrinsic_instr * instr)3546 static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
3547 {
3548    LLVMValueRef result = NULL;
3549 
3550    switch (instr->intrinsic) {
3551    case nir_intrinsic_ballot:
3552       result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
3553       if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
3554          result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
3555       break;
3556    case nir_intrinsic_read_invocation:
3557       result =
3558          ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
3559       break;
3560    case nir_intrinsic_read_first_invocation:
3561       result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
3562       break;
3563    case nir_intrinsic_load_subgroup_invocation:
3564       result = ac_get_thread_id(&ctx->ac);
3565       break;
3566    case nir_intrinsic_load_workgroup_id: {
3567       LLVMValueRef values[3];
3568 
3569       for (int i = 0; i < 3; i++) {
3570          values[i] = ctx->args->workgroup_ids[i].used
3571                         ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
3572                         : ctx->ac.i32_0;
3573       }
3574 
3575       result = ac_build_gather_values(&ctx->ac, values, 3);
3576       break;
3577    }
3578    case nir_intrinsic_load_base_vertex:
3579    case nir_intrinsic_load_first_vertex:
3580       result = ctx->abi->load_base_vertex(ctx->abi,
3581                                           instr->intrinsic == nir_intrinsic_load_base_vertex);
3582       break;
3583    case nir_intrinsic_load_workgroup_size:
3584       result = ctx->abi->load_local_group_size(ctx->abi);
3585       break;
3586    case nir_intrinsic_load_vertex_id:
3587       result = LLVMBuildAdd(ctx->ac.builder,
3588                             ctx->vertex_id_replaced ? ctx->vertex_id_replaced :
3589                                                       ac_get_arg(&ctx->ac, ctx->args->vertex_id),
3590                             ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
3591       break;
3592    case nir_intrinsic_load_vertex_id_zero_base: {
3593       result = ctx->vertex_id_replaced ? ctx->vertex_id_replaced : ctx->abi->vertex_id;
3594       break;
3595    }
3596    case nir_intrinsic_load_local_invocation_id: {
3597       LLVMValueRef ids = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
3598 
3599       if (LLVMGetTypeKind(LLVMTypeOf(ids)) == LLVMIntegerTypeKind) {
3600          /* Thread IDs are packed in VGPR0, 10 bits per component. */
3601          LLVMValueRef id[3];
3602 
3603          for (unsigned i = 0; i < 3; i++)
3604             id[i] = ac_unpack_param(&ctx->ac, ids, i * 10, 10);
3605 
3606          result = ac_build_gather_values(&ctx->ac, id, 3);
3607       } else {
3608          result = ids;
3609       }
3610       break;
3611    }
3612    case nir_intrinsic_load_base_instance:
3613       result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
3614       break;
3615    case nir_intrinsic_load_draw_id:
3616       result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
3617       break;
3618    case nir_intrinsic_load_view_index:
3619       result = ac_get_arg(&ctx->ac, ctx->args->view_index);
3620       break;
3621    case nir_intrinsic_load_invocation_id:
3622       if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3623          result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
3624       } else {
3625          if (ctx->ac.chip_class >= GFX10) {
3626             result =
3627                LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
3628                             LLVMConstInt(ctx->ac.i32, 127, 0), "");
3629          } else {
3630             result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
3631          }
3632       }
3633       break;
3634    case nir_intrinsic_load_primitive_id:
3635       if (ctx->stage == MESA_SHADER_GEOMETRY) {
3636          result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
3637       } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3638          result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
3639       } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
3640          result = ctx->tes_patch_id_replaced ? ctx->tes_patch_id_replaced
3641                                              : ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
3642       } else
3643          fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
3644       break;
3645    case nir_intrinsic_load_sample_id:
3646       result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
3647       break;
3648    case nir_intrinsic_load_sample_pos:
3649       result = load_sample_pos(ctx);
3650       break;
3651    case nir_intrinsic_load_sample_mask_in:
3652       result = ctx->abi->load_sample_mask_in(ctx->abi);
3653       break;
3654    case nir_intrinsic_load_frag_coord:
3655       result = emit_load_frag_coord(ctx);
3656       break;
3657    case nir_intrinsic_load_frag_shading_rate:
3658       result = emit_load_frag_shading_rate(ctx);
3659       break;
3660    case nir_intrinsic_load_front_face:
3661       result = emit_i2b(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->front_face));
3662       break;
3663    case nir_intrinsic_load_helper_invocation:
3664       result = ac_build_load_helper_invocation(&ctx->ac);
3665       break;
3666    case nir_intrinsic_is_helper_invocation:
3667       result = ac_build_is_helper_invocation(&ctx->ac);
3668       break;
3669    case nir_intrinsic_load_color0:
3670       result = ctx->abi->color0;
3671       break;
3672    case nir_intrinsic_load_color1:
3673       result = ctx->abi->color1;
3674       break;
3675    case nir_intrinsic_load_user_data_amd:
3676       assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
3677       result = ctx->abi->user_data;
3678       break;
3679    case nir_intrinsic_load_instance_id:
3680       result = ctx->instance_id_replaced ? ctx->instance_id_replaced : ctx->abi->instance_id;
3681       break;
3682    case nir_intrinsic_load_num_workgroups:
3683       if (ctx->abi->load_grid_size_from_user_sgpr) {
3684          result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
3685       } else {
3686          LLVMTypeRef ptr_type = ac_array_in_const_addr_space(ctx->ac.v3i32);
3687          LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
3688          ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, "");
3689          result = ac_build_load_invariant(&ctx->ac, ptr, ctx->ac.i32_0);
3690       }
3691       break;
3692    case nir_intrinsic_load_local_invocation_index:
3693       result = visit_load_local_invocation_index(ctx);
3694       break;
3695    case nir_intrinsic_load_subgroup_id:
3696       result = visit_load_subgroup_id(ctx);
3697       break;
3698    case nir_intrinsic_load_num_subgroups:
3699       result = visit_load_num_subgroups(ctx);
3700       break;
3701    case nir_intrinsic_first_invocation:
3702       result = visit_first_invocation(ctx);
3703       break;
3704    case nir_intrinsic_load_push_constant:
3705       result = visit_load_push_constant(ctx, instr);
3706       break;
3707    case nir_intrinsic_store_ssbo:
3708       visit_store_ssbo(ctx, instr);
3709       break;
3710    case nir_intrinsic_load_ssbo:
3711       result = visit_load_buffer(ctx, instr);
3712       break;
3713    case nir_intrinsic_load_global_constant:
3714    case nir_intrinsic_load_global:
3715       result = visit_load_global(ctx, instr);
3716       break;
3717    case nir_intrinsic_store_global:
3718       visit_store_global(ctx, instr);
3719       break;
3720    case nir_intrinsic_global_atomic_add:
3721    case nir_intrinsic_global_atomic_imin:
3722    case nir_intrinsic_global_atomic_umin:
3723    case nir_intrinsic_global_atomic_imax:
3724    case nir_intrinsic_global_atomic_umax:
3725    case nir_intrinsic_global_atomic_and:
3726    case nir_intrinsic_global_atomic_or:
3727    case nir_intrinsic_global_atomic_xor:
3728    case nir_intrinsic_global_atomic_exchange:
3729    case nir_intrinsic_global_atomic_comp_swap:
3730    case nir_intrinsic_global_atomic_fmin:
3731    case nir_intrinsic_global_atomic_fmax:
3732       result = visit_global_atomic(ctx, instr);
3733       break;
3734    case nir_intrinsic_ssbo_atomic_add:
3735    case nir_intrinsic_ssbo_atomic_imin:
3736    case nir_intrinsic_ssbo_atomic_umin:
3737    case nir_intrinsic_ssbo_atomic_imax:
3738    case nir_intrinsic_ssbo_atomic_umax:
3739    case nir_intrinsic_ssbo_atomic_and:
3740    case nir_intrinsic_ssbo_atomic_or:
3741    case nir_intrinsic_ssbo_atomic_xor:
3742    case nir_intrinsic_ssbo_atomic_exchange:
3743    case nir_intrinsic_ssbo_atomic_comp_swap:
3744    case nir_intrinsic_ssbo_atomic_fmin:
3745    case nir_intrinsic_ssbo_atomic_fmax:
3746       result = visit_atomic_ssbo(ctx, instr);
3747       break;
3748    case nir_intrinsic_load_ubo:
3749       result = visit_load_ubo_buffer(ctx, instr);
3750       break;
3751    case nir_intrinsic_get_ssbo_size:
3752       result = visit_get_ssbo_size(ctx, instr);
3753       break;
3754    case nir_intrinsic_load_input:
3755    case nir_intrinsic_load_input_vertex:
3756    case nir_intrinsic_load_per_vertex_input:
3757       result = visit_load(ctx, instr, false);
3758       break;
3759    case nir_intrinsic_load_output:
3760    case nir_intrinsic_load_per_vertex_output:
3761       result = visit_load(ctx, instr, true);
3762       break;
3763    case nir_intrinsic_store_output:
3764    case nir_intrinsic_store_per_vertex_output:
3765       visit_store_output(ctx, instr);
3766       break;
3767    case nir_intrinsic_load_shared:
3768       result = visit_load_shared(ctx, instr);
3769       break;
3770    case nir_intrinsic_store_shared:
3771       visit_store_shared(ctx, instr);
3772       break;
3773    case nir_intrinsic_bindless_image_samples:
3774    case nir_intrinsic_image_deref_samples:
3775       result = visit_image_samples(ctx, instr);
3776       break;
3777    case nir_intrinsic_bindless_image_load:
3778    case nir_intrinsic_bindless_image_sparse_load:
3779       result = visit_image_load(ctx, instr, true);
3780       break;
3781    case nir_intrinsic_image_deref_load:
3782    case nir_intrinsic_image_deref_sparse_load:
3783       result = visit_image_load(ctx, instr, false);
3784       break;
3785    case nir_intrinsic_bindless_image_store:
3786       visit_image_store(ctx, instr, true);
3787       break;
3788    case nir_intrinsic_image_deref_store:
3789       visit_image_store(ctx, instr, false);
3790       break;
3791    case nir_intrinsic_bindless_image_atomic_add:
3792    case nir_intrinsic_bindless_image_atomic_imin:
3793    case nir_intrinsic_bindless_image_atomic_umin:
3794    case nir_intrinsic_bindless_image_atomic_imax:
3795    case nir_intrinsic_bindless_image_atomic_umax:
3796    case nir_intrinsic_bindless_image_atomic_and:
3797    case nir_intrinsic_bindless_image_atomic_or:
3798    case nir_intrinsic_bindless_image_atomic_xor:
3799    case nir_intrinsic_bindless_image_atomic_exchange:
3800    case nir_intrinsic_bindless_image_atomic_comp_swap:
3801    case nir_intrinsic_bindless_image_atomic_inc_wrap:
3802    case nir_intrinsic_bindless_image_atomic_dec_wrap:
3803       result = visit_image_atomic(ctx, instr, true);
3804       break;
3805    case nir_intrinsic_image_deref_atomic_add:
3806    case nir_intrinsic_image_deref_atomic_imin:
3807    case nir_intrinsic_image_deref_atomic_umin:
3808    case nir_intrinsic_image_deref_atomic_imax:
3809    case nir_intrinsic_image_deref_atomic_umax:
3810    case nir_intrinsic_image_deref_atomic_and:
3811    case nir_intrinsic_image_deref_atomic_or:
3812    case nir_intrinsic_image_deref_atomic_xor:
3813    case nir_intrinsic_image_deref_atomic_exchange:
3814    case nir_intrinsic_image_deref_atomic_comp_swap:
3815    case nir_intrinsic_image_deref_atomic_inc_wrap:
3816    case nir_intrinsic_image_deref_atomic_dec_wrap:
3817    case nir_intrinsic_image_deref_atomic_fmin:
3818    case nir_intrinsic_image_deref_atomic_fmax:
3819       result = visit_image_atomic(ctx, instr, false);
3820       break;
3821    case nir_intrinsic_bindless_image_size:
3822       result = visit_image_size(ctx, instr, true);
3823       break;
3824    case nir_intrinsic_image_deref_size:
3825       result = visit_image_size(ctx, instr, false);
3826       break;
3827    case nir_intrinsic_shader_clock:
3828       result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
3829       break;
3830    case nir_intrinsic_discard:
3831    case nir_intrinsic_discard_if:
3832    case nir_intrinsic_terminate:
3833    case nir_intrinsic_terminate_if:
3834       emit_discard(ctx, instr);
3835       break;
3836    case nir_intrinsic_demote:
3837    case nir_intrinsic_demote_if:
3838       emit_demote(ctx, instr);
3839       break;
3840    case nir_intrinsic_memory_barrier:
3841    case nir_intrinsic_group_memory_barrier:
3842    case nir_intrinsic_memory_barrier_buffer:
3843    case nir_intrinsic_memory_barrier_image:
3844    case nir_intrinsic_memory_barrier_shared:
3845       emit_membar(&ctx->ac, instr);
3846       break;
3847    case nir_intrinsic_scoped_barrier: {
3848       assert(!(nir_intrinsic_memory_semantics(instr) &
3849                (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
3850 
3851       nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
3852 
3853       unsigned wait_flags = 0;
3854       if (modes & (nir_var_mem_global | nir_var_mem_ssbo | nir_var_image))
3855          wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
3856       if (modes & nir_var_mem_shared)
3857          wait_flags |= AC_WAIT_LGKM;
3858 
3859       if (wait_flags)
3860          ac_build_waitcnt(&ctx->ac, wait_flags);
3861 
3862       if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
3863          ac_emit_barrier(&ctx->ac, ctx->stage);
3864       break;
3865    }
3866    case nir_intrinsic_memory_barrier_tcs_patch:
3867       break;
3868    case nir_intrinsic_control_barrier:
3869       ac_emit_barrier(&ctx->ac, ctx->stage);
3870       break;
3871    case nir_intrinsic_shared_atomic_add:
3872    case nir_intrinsic_shared_atomic_imin:
3873    case nir_intrinsic_shared_atomic_umin:
3874    case nir_intrinsic_shared_atomic_imax:
3875    case nir_intrinsic_shared_atomic_umax:
3876    case nir_intrinsic_shared_atomic_and:
3877    case nir_intrinsic_shared_atomic_or:
3878    case nir_intrinsic_shared_atomic_xor:
3879    case nir_intrinsic_shared_atomic_exchange:
3880    case nir_intrinsic_shared_atomic_comp_swap:
3881    case nir_intrinsic_shared_atomic_fadd:
3882    case nir_intrinsic_shared_atomic_fmin:
3883    case nir_intrinsic_shared_atomic_fmax: {
3884       LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size, 0);
3885       result = visit_var_atomic(ctx, instr, ptr, 1);
3886       break;
3887    }
3888    case nir_intrinsic_deref_atomic_add:
3889    case nir_intrinsic_deref_atomic_imin:
3890    case nir_intrinsic_deref_atomic_umin:
3891    case nir_intrinsic_deref_atomic_imax:
3892    case nir_intrinsic_deref_atomic_umax:
3893    case nir_intrinsic_deref_atomic_and:
3894    case nir_intrinsic_deref_atomic_or:
3895    case nir_intrinsic_deref_atomic_xor:
3896    case nir_intrinsic_deref_atomic_exchange:
3897    case nir_intrinsic_deref_atomic_comp_swap:
3898    case nir_intrinsic_deref_atomic_fadd: {
3899       LLVMValueRef ptr = get_src(ctx, instr->src[0]);
3900       result = visit_var_atomic(ctx, instr, ptr, 1);
3901       break;
3902    }
3903    case nir_intrinsic_load_barycentric_pixel:
3904       result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
3905       break;
3906    case nir_intrinsic_load_barycentric_centroid:
3907       result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
3908       break;
3909    case nir_intrinsic_load_barycentric_sample:
3910       result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
3911       break;
3912    case nir_intrinsic_load_barycentric_model:
3913       result = barycentric_model(ctx);
3914       break;
3915    case nir_intrinsic_load_barycentric_at_offset: {
3916       LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
3917       result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
3918       break;
3919    }
3920    case nir_intrinsic_load_barycentric_at_sample: {
3921       LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
3922       result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
3923       break;
3924    }
3925    case nir_intrinsic_load_interpolated_input: {
3926       /* We assume any indirect loads have been lowered away */
3927       ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
3928       assert(offset);
3929       assert(offset[0].i32 == 0);
3930 
3931       LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
3932       unsigned index = nir_intrinsic_base(instr);
3933       unsigned component = nir_intrinsic_component(instr);
3934       result = load_interpolated_input(ctx, interp_param, index, component,
3935                                        instr->dest.ssa.num_components, instr->dest.ssa.bit_size,
3936                                        nir_intrinsic_io_semantics(instr).high_16bits);
3937       break;
3938    }
3939    case nir_intrinsic_emit_vertex:
3940       ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
3941       break;
3942    case nir_intrinsic_emit_vertex_with_counter: {
3943       unsigned stream = nir_intrinsic_stream_id(instr);
3944       LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
3945       ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
3946       break;
3947    }
3948    case nir_intrinsic_end_primitive:
3949    case nir_intrinsic_end_primitive_with_counter:
3950       ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
3951       break;
3952    case nir_intrinsic_load_tess_coord: {
3953       LLVMValueRef coord[] = {
3954          ctx->tes_u_replaced ? ctx->tes_u_replaced : ac_get_arg(&ctx->ac, ctx->args->tes_u),
3955          ctx->tes_v_replaced ? ctx->tes_v_replaced : ac_get_arg(&ctx->ac, ctx->args->tes_v),
3956          ctx->ac.f32_0,
3957       };
3958 
3959       /* For triangles, the vector should be (u, v, 1-u-v). */
3960       if (ctx->info->tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
3961          coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
3962                                   LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
3963       }
3964       result = ac_build_gather_values(&ctx->ac, coord, 3);
3965       break;
3966    }
3967    case nir_intrinsic_load_tess_level_outer:
3968       result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
3969       break;
3970    case nir_intrinsic_load_tess_level_inner:
3971       result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
3972       break;
3973    case nir_intrinsic_load_tess_level_outer_default:
3974       result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
3975       break;
3976    case nir_intrinsic_load_tess_level_inner_default:
3977       result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
3978       break;
3979    case nir_intrinsic_load_patch_vertices_in:
3980       result = ctx->abi->load_patch_vertices_in(ctx->abi);
3981       break;
3982    case nir_intrinsic_load_tess_rel_patch_id_amd:
3983       if (ctx->stage == MESA_SHADER_TESS_CTRL)
3984          result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 0, 8);
3985       else if (ctx->stage == MESA_SHADER_TESS_EVAL)
3986          result = ctx->tes_rel_patch_id_replaced ? ctx->tes_rel_patch_id_replaced
3987                                                  : ac_get_arg(&ctx->ac, ctx->args->tes_rel_patch_id);
3988       else
3989          unreachable("tess_rel_patch_id_amd is only supported by tessellation shaders");
3990       break;
3991    case nir_intrinsic_load_ring_tess_factors_amd:
3992       result = ctx->abi->load_ring_tess_factors(ctx->abi);
3993       break;
3994    case nir_intrinsic_load_ring_tess_factors_offset_amd:
3995       result = ac_get_arg(&ctx->ac, ctx->args->tcs_factor_offset);
3996       break;
3997    case nir_intrinsic_load_ring_tess_offchip_amd:
3998       result = ctx->abi->load_ring_tess_offchip(ctx->abi);
3999       break;
4000    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
4001       result = ac_get_arg(&ctx->ac, ctx->args->tess_offchip_offset);
4002       break;
4003    case nir_intrinsic_load_ring_esgs_amd:
4004       result = ctx->abi->load_ring_esgs(ctx->abi);
4005       break;
4006    case nir_intrinsic_load_ring_es2gs_offset_amd:
4007       result = ac_get_arg(&ctx->ac, ctx->args->es2gs_offset);
4008       break;
4009    case nir_intrinsic_load_gs_vertex_offset_amd:
4010       result = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[nir_intrinsic_base(instr)]);
4011       break;
4012    case nir_intrinsic_vote_all: {
4013       result = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
4014       break;
4015    }
4016    case nir_intrinsic_vote_any: {
4017       result = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
4018       break;
4019    }
4020    case nir_intrinsic_shuffle:
4021       if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
4022           (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
4023          result =
4024             ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
4025       } else {
4026          LLVMValueRef src = get_src(ctx, instr->src[0]);
4027          LLVMValueRef index = get_src(ctx, instr->src[1]);
4028          LLVMTypeRef type = LLVMTypeOf(src);
4029          struct waterfall_context wctx;
4030          LLVMValueRef index_val;
4031 
4032          index_val = enter_waterfall(ctx, &wctx, index, true);
4033 
4034          src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
4035 
4036          result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
4037                                      (LLVMValueRef[]){src, index_val}, 2,
4038                                      AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
4039 
4040          result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
4041 
4042          result = exit_waterfall(ctx, &wctx, result);
4043       }
4044       break;
4045    case nir_intrinsic_reduce:
4046       result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
4047                                instr->const_index[1]);
4048       break;
4049    case nir_intrinsic_inclusive_scan:
4050       result =
4051          ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
4052       break;
4053    case nir_intrinsic_exclusive_scan:
4054       result =
4055          ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
4056       break;
4057    case nir_intrinsic_quad_broadcast: {
4058       unsigned lane = nir_src_as_uint(instr->src[1]);
4059       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
4060       break;
4061    }
4062    case nir_intrinsic_quad_swap_horizontal:
4063       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
4064       break;
4065    case nir_intrinsic_quad_swap_vertical:
4066       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
4067       break;
4068    case nir_intrinsic_quad_swap_diagonal:
4069       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
4070       break;
4071    case nir_intrinsic_quad_swizzle_amd: {
4072       uint32_t mask = nir_intrinsic_swizzle_mask(instr);
4073       result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
4074                                      (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
4075       break;
4076    }
4077    case nir_intrinsic_masked_swizzle_amd: {
4078       uint32_t mask = nir_intrinsic_swizzle_mask(instr);
4079       result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
4080       break;
4081    }
4082    case nir_intrinsic_write_invocation_amd:
4083       result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
4084                                   get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
4085       break;
4086    case nir_intrinsic_mbcnt_amd:
4087       result = ac_build_mbcnt_add(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
4088       break;
4089    case nir_intrinsic_load_scratch: {
4090       LLVMValueRef offset = get_src(ctx, instr->src[0]);
4091       LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
4092       LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
4093       LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
4094                                 ? comp_type
4095                                 : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
4096       unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
4097       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
4098       result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
4099       break;
4100    }
4101    case nir_intrinsic_store_scratch: {
4102       LLVMValueRef offset = get_src(ctx, instr->src[1]);
4103       LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
4104       LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
4105       unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
4106       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), "");
4107       LLVMValueRef src = get_src(ctx, instr->src[0]);
4108       unsigned wrmask = nir_intrinsic_write_mask(instr);
4109       while (wrmask) {
4110          int start, count;
4111          u_bit_scan_consecutive_range(&wrmask, &start, &count);
4112 
4113          LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
4114          LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
4115          LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);
4116          offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr,
4117                                        LLVMPointerType(vec_type, addr_space), "");
4118          LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count);
4119          LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
4120       }
4121       break;
4122    }
4123    case nir_intrinsic_load_constant: {
4124       unsigned base = nir_intrinsic_base(instr);
4125       unsigned range = nir_intrinsic_range(instr);
4126 
4127       LLVMValueRef offset = get_src(ctx, instr->src[0]);
4128       offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), "");
4129 
4130       /* Clamp the offset to avoid out-of-bound access because global
4131        * instructions can't handle them.
4132        */
4133       LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
4134       LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
4135       offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
4136 
4137       LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset);
4138       LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
4139       LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
4140                                 ? comp_type
4141                                 : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
4142       unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
4143       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
4144       result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
4145       break;
4146    }
4147    case nir_intrinsic_set_vertex_and_primitive_count:
4148       /* Currently ignored. */
4149       break;
4150    case nir_intrinsic_load_buffer_amd: {
4151       LLVMValueRef descriptor = get_src(ctx, instr->src[0]);
4152       LLVMValueRef addr_voffset = get_src(ctx, instr->src[1]);
4153       LLVMValueRef addr_soffset = get_src(ctx, instr->src[2]);
4154       unsigned num_components = instr->dest.ssa.num_components;
4155       unsigned const_offset = nir_intrinsic_base(instr);
4156       bool swizzled = nir_intrinsic_is_swizzled(instr);
4157       bool reorder = nir_intrinsic_can_reorder(instr);
4158       bool slc = nir_intrinsic_slc_amd(instr);
4159 
4160       enum ac_image_cache_policy cache_policy = ac_glc;
4161       if (swizzled)
4162          cache_policy |= ac_swizzled;
4163       if (slc)
4164          cache_policy |= ac_slc;
4165       if (ctx->ac.chip_class >= GFX10)
4166          cache_policy |= ac_dlc;
4167 
4168       LLVMTypeRef channel_type;
4169       if (instr->dest.ssa.bit_size == 8)
4170          channel_type = ctx->ac.i8;
4171       else if (instr->dest.ssa.bit_size == 16)
4172          channel_type = ctx->ac.i16;
4173       else if (instr->dest.ssa.bit_size == 32)
4174          channel_type = ctx->ac.i32;
4175       else if (instr->dest.ssa.bit_size == 64)
4176          channel_type = ctx->ac.i64;
4177       else if (instr->dest.ssa.bit_size == 128)
4178          channel_type = ctx->ac.i128;
4179       else
4180          unreachable("Unsupported channel type for load_buffer_amd");
4181 
4182       result = ac_build_buffer_load(&ctx->ac, descriptor, num_components, NULL,
4183                                     addr_voffset, addr_soffset, const_offset,
4184                                     channel_type, cache_policy, reorder, false);
4185       result = ac_to_integer(&ctx->ac, ac_trim_vector(&ctx->ac, result, num_components));
4186       break;
4187    }
4188    case nir_intrinsic_store_buffer_amd: {
4189       LLVMValueRef store_data = get_src(ctx, instr->src[0]);
4190       LLVMValueRef descriptor = get_src(ctx, instr->src[1]);
4191       LLVMValueRef addr_voffset = get_src(ctx, instr->src[2]);
4192       LLVMValueRef addr_soffset = get_src(ctx, instr->src[3]);
4193       unsigned const_offset = nir_intrinsic_base(instr);
4194       bool swizzled = nir_intrinsic_is_swizzled(instr);
4195       bool slc = nir_intrinsic_slc_amd(instr);
4196 
4197       enum ac_image_cache_policy cache_policy = ac_glc;
4198       if (swizzled)
4199          cache_policy |= ac_swizzled;
4200       if (slc)
4201          cache_policy |= ac_slc;
4202 
4203       ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data,
4204                                   NULL, addr_voffset, addr_soffset, const_offset,
4205                                   cache_policy);
4206       break;
4207    }
4208    case nir_intrinsic_load_packed_passthrough_primitive_amd:
4209       result = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]);
4210       break;
4211    case nir_intrinsic_load_initial_edgeflags_amd:
4212       if (ctx->stage == MESA_SHADER_VERTEX && !ctx->info->vs.blit_sgprs_amd)
4213          result = ac_pack_edgeflags_for_export(&ctx->ac, ctx->args);
4214       else
4215          result = ctx->ac.i32_0;
4216       break;
4217    case nir_intrinsic_has_input_vertex_amd: {
4218       LLVMValueRef num =
4219          ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 0, 8);
4220       result = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num, "");
4221       break;
4222    }
4223    case nir_intrinsic_has_input_primitive_amd: {
4224       LLVMValueRef num =
4225          ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 8, 8);
4226       result = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num, "");
4227       break;
4228    }
4229    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
4230       result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 12, 9);
4231       break;
4232    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
4233       result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 22, 9);
4234       break;
4235    case nir_intrinsic_alloc_vertices_and_primitives_amd:
4236       /* The caller should only call this conditionally for wave 0, so assume that the current
4237        * wave is always wave 0.
4238        */
4239       ac_build_sendmsg_gs_alloc_req(&ctx->ac, ctx->ac.i32_0,
4240                                     get_src(ctx, instr->src[0]),
4241                                     get_src(ctx, instr->src[1]));
4242       break;
4243    case nir_intrinsic_overwrite_vs_arguments_amd:
4244       ctx->vertex_id_replaced = get_src(ctx, instr->src[0]);
4245       ctx->instance_id_replaced = get_src(ctx, instr->src[1]);
4246       break;
4247    case nir_intrinsic_overwrite_tes_arguments_amd:
4248       ctx->tes_u_replaced = get_src(ctx, instr->src[0]);
4249       ctx->tes_v_replaced = get_src(ctx, instr->src[1]);
4250       ctx->tes_rel_patch_id_replaced = get_src(ctx, instr->src[2]);
4251       ctx->tes_patch_id_replaced = get_src(ctx, instr->src[3]);
4252       break;
4253    case nir_intrinsic_export_primitive_amd: {
4254       struct ac_ngg_prim prim = {0};
4255       prim.passthrough = get_src(ctx, instr->src[0]);
4256       ac_build_export_prim(&ctx->ac, &prim);
4257       break;
4258    }
4259    case nir_intrinsic_export_vertex_amd:
4260       ctx->abi->export_vertex(ctx->abi);
4261       break;
4262    case nir_intrinsic_elect:
4263       result = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, visit_first_invocation(ctx),
4264                              ac_get_thread_id(&ctx->ac), "");
4265       break;
4266    case nir_intrinsic_byte_permute_amd:
4267       if (LLVM_VERSION_MAJOR < 13) {
4268          assert("unimplemented byte_permute, LLVM 12 doesn't have amdgcn.perm");
4269          break;
4270       }
4271       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.perm", ctx->ac.i32,
4272                                   (LLVMValueRef[]){get_src(ctx, instr->src[0]),
4273                                                    get_src(ctx, instr->src[1]),
4274                                                    get_src(ctx, instr->src[2])},
4275                                   3, AC_FUNC_ATTR_READNONE);
4276       break;
4277    case nir_intrinsic_lane_permute_16_amd:
4278       result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.permlane16", ctx->ac.i32,
4279                                   (LLVMValueRef[]){get_src(ctx, instr->src[0]),
4280                                                    get_src(ctx, instr->src[0]),
4281                                                    get_src(ctx, instr->src[1]),
4282                                                    get_src(ctx, instr->src[2]),
4283                                                    ctx->ac.i1false,
4284                                                    ctx->ac.i1false},
4285                                   6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
4286       break;
4287    case nir_intrinsic_load_force_vrs_rates_amd:
4288       result = ac_get_arg(&ctx->ac, ctx->args->force_vrs_rates);
4289       break;
4290    case nir_intrinsic_load_scalar_arg_amd:
4291    case nir_intrinsic_load_vector_arg_amd: {
4292       assert(nir_intrinsic_base(instr) < AC_MAX_ARGS);
4293       result = ac_to_integer(&ctx->ac, LLVMGetParam(ctx->main_function, nir_intrinsic_base(instr)));
4294       break;
4295    }
4296    case nir_intrinsic_load_smem_amd: {
4297       LLVMValueRef base = get_src(ctx, instr->src[0]);
4298       LLVMValueRef offset = get_src(ctx, instr->src[1]);
4299 
4300       LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
4301       LLVMTypeRef ptr_type = LLVMPointerType(result_type, AC_ADDR_SPACE_CONST);
4302       LLVMTypeRef byte_ptr_type = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST);
4303 
4304       LLVMValueRef addr = LLVMBuildIntToPtr(ctx->ac.builder, base, byte_ptr_type, "");
4305       addr = LLVMBuildGEP(ctx->ac.builder, addr, &offset, 1, "");
4306       addr = LLVMBuildBitCast(ctx->ac.builder, addr, ptr_type, "");
4307 
4308       LLVMSetMetadata(addr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
4309       result = LLVMBuildLoad(ctx->ac.builder, addr, "");
4310       LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
4311       break;
4312    }
4313    default:
4314       fprintf(stderr, "Unknown intrinsic: ");
4315       nir_print_instr(&instr->instr, stderr);
4316       fprintf(stderr, "\n");
4317       abort();
4318       break;
4319    }
4320    if (result) {
4321       ctx->ssa_defs[instr->dest.ssa.index] = result;
4322    }
4323 }
4324 
get_bindless_index_from_uniform(struct ac_nir_context * ctx,unsigned base_index,unsigned constant_index,LLVMValueRef dynamic_index)4325 static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx, unsigned base_index,
4326                                                     unsigned constant_index,
4327                                                     LLVMValueRef dynamic_index)
4328 {
4329    LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
4330    LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
4331                                      LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
4332 
4333    /* Bindless uniforms are 64bit so multiple index by 8 */
4334    index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
4335    offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
4336 
4337    LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
4338 
4339    LLVMValueRef ret =
4340       ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset, NULL, 0, ctx->ac.f32, 0, true, true);
4341 
4342    return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
4343 }
4344 
4345 struct sampler_desc_address {
4346    unsigned descriptor_set;
4347    unsigned base_index; /* binding in vulkan */
4348    unsigned constant_index;
4349    LLVMValueRef dynamic_index;
4350    bool image;
4351    bool bindless;
4352 };
4353 
get_sampler_desc_internal(struct ac_nir_context * ctx,nir_deref_instr * deref_instr,const nir_instr * instr,bool image)4354 static struct sampler_desc_address get_sampler_desc_internal(struct ac_nir_context *ctx,
4355                                                              nir_deref_instr *deref_instr,
4356                                                              const nir_instr *instr, bool image)
4357 {
4358    LLVMValueRef index = NULL;
4359    unsigned constant_index = 0;
4360    unsigned descriptor_set;
4361    unsigned base_index;
4362    bool bindless = false;
4363 
4364    if (!deref_instr) {
4365       descriptor_set = 0;
4366       if (image) {
4367          nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
4368          base_index = 0;
4369          bindless = true;
4370          index = get_src(ctx, img_instr->src[0]);
4371       } else {
4372          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
4373          int sampSrcIdx = nir_tex_instr_src_index(tex_instr, nir_tex_src_sampler_handle);
4374          if (sampSrcIdx != -1) {
4375             base_index = 0;
4376             bindless = true;
4377             index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
4378          } else {
4379             assert(tex_instr && !image);
4380             base_index = tex_instr->sampler_index;
4381          }
4382       }
4383    } else {
4384       while (deref_instr->deref_type != nir_deref_type_var) {
4385          if (deref_instr->deref_type == nir_deref_type_array) {
4386             unsigned array_size = glsl_get_aoa_size(deref_instr->type);
4387             if (!array_size)
4388                array_size = 1;
4389 
4390             if (nir_src_is_const(deref_instr->arr.index)) {
4391                constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
4392             } else {
4393                LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
4394 
4395                indirect = LLVMBuildMul(ctx->ac.builder, indirect,
4396                                        LLVMConstInt(ctx->ac.i32, array_size, false), "");
4397 
4398                if (!index)
4399                   index = indirect;
4400                else
4401                   index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
4402             }
4403 
4404             deref_instr = nir_src_as_deref(deref_instr->parent);
4405          } else if (deref_instr->deref_type == nir_deref_type_struct) {
4406             unsigned sidx = deref_instr->strct.index;
4407             deref_instr = nir_src_as_deref(deref_instr->parent);
4408             constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
4409          } else {
4410             unreachable("Unsupported deref type");
4411          }
4412       }
4413       descriptor_set = deref_instr->var->data.descriptor_set;
4414 
4415       if (deref_instr->var->data.bindless) {
4416          /* For now just assert on unhandled variable types */
4417          assert(deref_instr->var->data.mode == nir_var_uniform);
4418 
4419          base_index = deref_instr->var->data.driver_location;
4420          bindless = true;
4421 
4422          index = index ? index : ctx->ac.i32_0;
4423          index = get_bindless_index_from_uniform(ctx, base_index, constant_index, index);
4424       } else
4425          base_index = deref_instr->var->data.binding;
4426    }
4427    return (struct sampler_desc_address){
4428       .descriptor_set = descriptor_set,
4429       .base_index = base_index,
4430       .constant_index = constant_index,
4431       .dynamic_index = index,
4432       .image = image,
4433       .bindless = bindless,
4434    };
4435 }
4436 
4437 /* Extract any possibly divergent index into a separate value that can be fed
4438  * into get_sampler_desc with the same arguments. */
get_sampler_desc_index(struct ac_nir_context * ctx,nir_deref_instr * deref_instr,const nir_instr * instr,bool image)4439 static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
4440                                            const nir_instr *instr, bool image)
4441 {
4442    struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
4443    return addr.dynamic_index;
4444 }
4445 
get_sampler_desc(struct ac_nir_context * ctx,nir_deref_instr * deref_instr,enum ac_descriptor_type desc_type,const nir_instr * instr,LLVMValueRef index,bool image,bool write)4446 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
4447                                      enum ac_descriptor_type desc_type, const nir_instr *instr,
4448                                      LLVMValueRef index, bool image, bool write)
4449 {
4450    struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
4451    return ctx->abi->load_sampler_desc(ctx->abi, addr.descriptor_set, addr.base_index,
4452                                       addr.constant_index, index, desc_type, addr.image, write,
4453                                       addr.bindless);
4454 }
4455 
4456 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4457  *
4458  * GFX6-GFX7:
4459  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4460  *   filtering manually. The driver sets img7 to a mask clearing
4461  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4462  *     s_and_b32 samp0, samp0, img7
4463  *
4464  * GFX8:
4465  *   The ANISO_OVERRIDE sampler field enables this fix in TA.
4466  */
sici_fix_sampler_aniso(struct ac_nir_context * ctx,LLVMValueRef res,LLVMValueRef samp)4467 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValueRef res,
4468                                            LLVMValueRef samp)
4469 {
4470    LLVMBuilderRef builder = ctx->ac.builder;
4471    LLVMValueRef img7, samp0;
4472 
4473    if (ctx->ac.chip_class >= GFX8)
4474       return samp;
4475 
4476    img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");
4477    samp0 = LLVMBuildExtractElement(builder, samp, LLVMConstInt(ctx->ac.i32, 0, 0), "");
4478    samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4479    return LLVMBuildInsertElement(builder, samp, samp0, LLVMConstInt(ctx->ac.i32, 0, 0), "");
4480 }
4481 
tex_fetch_ptrs(struct ac_nir_context * ctx,nir_tex_instr * instr,struct waterfall_context * wctx,LLVMValueRef * res_ptr,LLVMValueRef * samp_ptr,LLVMValueRef * fmask_ptr)4482 static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
4483                            struct waterfall_context *wctx, LLVMValueRef *res_ptr,
4484                            LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
4485 {
4486    LLVMValueRef texture_dynamic_handle = NULL;
4487    LLVMValueRef sampler_dynamic_handle = NULL;
4488    nir_deref_instr *texture_deref_instr = NULL;
4489    nir_deref_instr *sampler_deref_instr = NULL;
4490    int plane = -1;
4491 
4492    *res_ptr = NULL;
4493    *samp_ptr = NULL;
4494    *fmask_ptr = NULL;
4495    for (unsigned i = 0; i < instr->num_srcs; i++) {
4496       switch (instr->src[i].src_type) {
4497       case nir_tex_src_texture_deref:
4498          texture_deref_instr = nir_src_as_deref(instr->src[i].src);
4499          break;
4500       case nir_tex_src_sampler_deref:
4501          sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
4502          break;
4503       case nir_tex_src_texture_handle:
4504       case nir_tex_src_sampler_handle: {
4505          LLVMValueRef val = get_src(ctx, instr->src[i].src);
4506          if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind) {
4507             if (instr->src[i].src_type == nir_tex_src_texture_handle)
4508                *res_ptr = val;
4509             else
4510                *samp_ptr = val;
4511          } else {
4512             if (instr->src[i].src_type == nir_tex_src_texture_handle)
4513                texture_dynamic_handle = val;
4514             else
4515                sampler_dynamic_handle = val;
4516          }
4517          break;
4518       }
4519       case nir_tex_src_plane:
4520          plane = nir_src_as_int(instr->src[i].src);
4521          break;
4522       default:
4523          break;
4524       }
4525    }
4526 
4527    if (*res_ptr) {
4528       /* descriptors given through nir_tex_src_{texture,sampler}_handle */
4529       return;
4530    }
4531 
4532    enum ac_descriptor_type main_descriptor =
4533       instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
4534 
4535    if (plane >= 0) {
4536       assert(instr->op != nir_texop_txf_ms && instr->op != nir_texop_samples_identical);
4537       assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
4538 
4539       main_descriptor = AC_DESC_PLANE_0 + plane;
4540    }
4541 
4542    if (instr->op == nir_texop_fragment_mask_fetch_amd || instr->op == nir_texop_samples_identical) {
4543       /* The fragment mask is fetched from the compressed
4544        * multisampled surface.
4545        */
4546       main_descriptor = AC_DESC_FMASK;
4547    }
4548 
4549    if (texture_dynamic_handle) {
4550       /* descriptor handles given through nir_tex_src_{texture,sampler}_handle */
4551       if (instr->texture_non_uniform)
4552          texture_dynamic_handle = enter_waterfall(ctx, &wctx[0], texture_dynamic_handle, true);
4553 
4554       if (instr->sampler_non_uniform)
4555          sampler_dynamic_handle = enter_waterfall(ctx, &wctx[1], sampler_dynamic_handle, true);
4556 
4557       *res_ptr = ctx->abi->load_sampler_desc(ctx->abi, 0, 0, 0, texture_dynamic_handle,
4558                                              main_descriptor, false, false, true);
4559 
4560       if (samp_ptr)
4561          *samp_ptr = ctx->abi->load_sampler_desc(ctx->abi, 0, 0, 0, sampler_dynamic_handle,
4562                                                  AC_DESC_SAMPLER, false, false, true);
4563       return;
4564    }
4565 
4566    LLVMValueRef texture_dynamic_index =
4567       get_sampler_desc_index(ctx, texture_deref_instr, &instr->instr, false);
4568    if (!sampler_deref_instr)
4569       sampler_deref_instr = texture_deref_instr;
4570 
4571    LLVMValueRef sampler_dynamic_index =
4572       get_sampler_desc_index(ctx, sampler_deref_instr, &instr->instr, false);
4573    if (instr->texture_non_uniform)
4574       texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
4575 
4576    if (instr->sampler_non_uniform)
4577       sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
4578 
4579    *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
4580                                texture_dynamic_index, false, false);
4581 
4582    if (samp_ptr) {
4583       *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
4584                                    sampler_dynamic_index, false, false);
4585       if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4586          *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4587    }
4588    if (fmask_ptr && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical))
4589       *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr,
4590                                     texture_dynamic_index, false, false);
4591 }
4592 
apply_round_slice(struct ac_llvm_context * ctx,LLVMValueRef coord)4593 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord)
4594 {
4595    coord = ac_to_float(ctx, coord);
4596    coord = ac_build_round(ctx, coord);
4597    coord = ac_to_integer(ctx, coord);
4598    return coord;
4599 }
4600 
visit_tex(struct ac_nir_context * ctx,nir_tex_instr * instr)4601 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
4602 {
4603    LLVMValueRef result = NULL;
4604    struct ac_image_args args = {0};
4605    LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
4606    LLVMValueRef ddx = NULL, ddy = NULL;
4607    unsigned offset_src = 0;
4608    struct waterfall_context wctx[2] = {{{0}}};
4609 
4610    tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
4611 
4612    for (unsigned i = 0; i < instr->num_srcs; i++) {
4613       switch (instr->src[i].src_type) {
4614       case nir_tex_src_coord: {
4615          LLVMValueRef coord = get_src(ctx, instr->src[i].src);
4616          args.a16 = instr->src[i].src.ssa->bit_size == 16;
4617          for (unsigned chan = 0; chan < instr->coord_components; ++chan)
4618             args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
4619          break;
4620       }
4621       case nir_tex_src_projector:
4622          break;
4623       case nir_tex_src_comparator:
4624          if (instr->is_shadow) {
4625             args.compare = get_src(ctx, instr->src[i].src);
4626             args.compare = ac_to_float(&ctx->ac, args.compare);
4627             assert(instr->src[i].src.ssa->bit_size == 32);
4628          }
4629          break;
4630       case nir_tex_src_offset:
4631          args.offset = get_src(ctx, instr->src[i].src);
4632          offset_src = i;
4633          /* We pack it with bit shifts, so we need it to be 32-bit. */
4634          assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.offset)) == 32);
4635          break;
4636       case nir_tex_src_bias:
4637          args.bias = get_src(ctx, instr->src[i].src);
4638          assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.bias)) == 32);
4639          break;
4640       case nir_tex_src_lod:
4641          if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
4642             args.level_zero = true;
4643          else
4644             args.lod = get_src(ctx, instr->src[i].src);
4645          break;
4646       case nir_tex_src_ms_index:
4647          sample_index = get_src(ctx, instr->src[i].src);
4648          break;
4649       case nir_tex_src_ddx:
4650          ddx = get_src(ctx, instr->src[i].src);
4651          args.g16 = instr->src[i].src.ssa->bit_size == 16;
4652          break;
4653       case nir_tex_src_ddy:
4654          ddy = get_src(ctx, instr->src[i].src);
4655          assert(LLVMTypeOf(ddy) == LLVMTypeOf(ddx));
4656          break;
4657       case nir_tex_src_min_lod:
4658          args.min_lod = get_src(ctx, instr->src[i].src);
4659          break;
4660       case nir_tex_src_texture_offset:
4661       case nir_tex_src_sampler_offset:
4662       case nir_tex_src_plane:
4663       default:
4664          break;
4665       }
4666    }
4667 
4668    if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4669       result = get_buffer_size(ctx, args.resource, true);
4670       goto write_result;
4671    }
4672 
4673    if (instr->op == nir_texop_texture_samples) {
4674       LLVMValueRef res, samples, is_msaa;
4675       LLVMValueRef default_sample;
4676 
4677       res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
4678       samples =
4679          LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), "");
4680       is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 28, false), "");
4681       is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4682       is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
4683                               LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4684 
4685       samples = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 16, false), "");
4686       samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 0xf, false), "");
4687       samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, "");
4688 
4689       if (ctx->abi->robust_buffer_access) {
4690          LLVMValueRef dword1, is_null_descriptor;
4691 
4692          /* Extract the second dword of the descriptor, if it's
4693           * all zero, then it's a null descriptor.
4694           */
4695          dword1 =
4696             LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 1, false), "");
4697          is_null_descriptor = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
4698                                             LLVMConstInt(ctx->ac.i32, 0, false), "");
4699          default_sample =
4700             LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, ctx->ac.i32_0, ctx->ac.i32_1, "");
4701       } else {
4702          default_sample = ctx->ac.i32_1;
4703       }
4704 
4705       samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, default_sample, "");
4706       result = samples;
4707       goto write_result;
4708    }
4709 
4710    if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
4711       LLVMValueRef offset[3], pack;
4712       for (unsigned chan = 0; chan < 3; ++chan)
4713          offset[chan] = ctx->ac.i32_0;
4714 
4715       unsigned num_components = ac_get_llvm_num_components(args.offset);
4716       for (unsigned chan = 0; chan < num_components; chan++) {
4717          offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
4718          offset[chan] =
4719             LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
4720          if (chan)
4721             offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
4722                                         LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
4723       }
4724       pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
4725       pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
4726       args.offset = pack;
4727    }
4728 
4729    /* Section 8.23.1 (Depth Texture Comparison Mode) of the
4730     * OpenGL 4.5 spec says:
4731     *
4732     *    "If the texture’s internal format indicates a fixed-point
4733     *     depth texture, then D_t and D_ref are clamped to the
4734     *     range [0, 1]; otherwise no clamping is performed."
4735     *
4736     * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
4737     * so the depth comparison value isn't clamped for Z16 and
4738     * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
4739     * an explicitly clamped 32-bit float format.
4740     */
4741    if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->ac.chip_class <= GFX9 &&
4742        ctx->abi->clamp_shadow_reference) {
4743       LLVMValueRef upgraded, clamped;
4744 
4745       upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
4746                                          LLVMConstInt(ctx->ac.i32, 3, false), "");
4747       upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->ac.i32, 29, false), "");
4748       upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
4749       clamped = ac_build_clamp(&ctx->ac, args.compare);
4750       args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, args.compare, "");
4751    }
4752 
4753    /* pack derivatives */
4754    if (ddx || ddy) {
4755       int num_src_deriv_channels, num_dest_deriv_channels;
4756       switch (instr->sampler_dim) {
4757       case GLSL_SAMPLER_DIM_3D:
4758       case GLSL_SAMPLER_DIM_CUBE:
4759          num_src_deriv_channels = 3;
4760          num_dest_deriv_channels = 3;
4761          break;
4762       case GLSL_SAMPLER_DIM_2D:
4763       default:
4764          num_src_deriv_channels = 2;
4765          num_dest_deriv_channels = 2;
4766          break;
4767       case GLSL_SAMPLER_DIM_1D:
4768          num_src_deriv_channels = 1;
4769          if (ctx->ac.chip_class == GFX9) {
4770             num_dest_deriv_channels = 2;
4771          } else {
4772             num_dest_deriv_channels = 1;
4773          }
4774          break;
4775       }
4776 
4777       for (unsigned i = 0; i < num_src_deriv_channels; i++) {
4778          args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
4779          args.derivs[num_dest_deriv_channels + i] =
4780             ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
4781       }
4782       for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
4783          LLVMValueRef zero = args.g16 ? ctx->ac.f16_0 : ctx->ac.f32_0;
4784          args.derivs[i] = zero;
4785          args.derivs[num_dest_deriv_channels + i] = zero;
4786       }
4787    }
4788 
4789    if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
4790       for (unsigned chan = 0; chan < instr->coord_components; chan++)
4791          args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
4792       if (instr->coord_components == 3)
4793          args.coords[3] = LLVMGetUndef(args.a16 ? ctx->ac.f16 : ctx->ac.f32);
4794       ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
4795                              instr->op == nir_texop_lod, args.coords, args.derivs);
4796    }
4797 
4798    /* Texture coordinates fixups */
4799    if (instr->coord_components > 1 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4800        instr->is_array && instr->op != nir_texop_txf) {
4801       args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
4802    }
4803 
4804    if (instr->coord_components > 2 &&
4805        (instr->sampler_dim == GLSL_SAMPLER_DIM_2D || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
4806         instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
4807         instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
4808        instr->is_array && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms &&
4809        instr->op != nir_texop_fragment_fetch_amd && instr->op != nir_texop_fragment_mask_fetch_amd) {
4810       args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
4811    }
4812 
4813    if (ctx->ac.chip_class == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4814        instr->op != nir_texop_lod) {
4815       LLVMValueRef filler;
4816       if (instr->op == nir_texop_txf)
4817          filler = args.a16 ? ctx->ac.i16_0 : ctx->ac.i32_0;
4818       else
4819          filler = LLVMConstReal(args.a16 ? ctx->ac.f16 : ctx->ac.f32, 0.5);
4820 
4821       if (instr->is_array)
4822          args.coords[2] = args.coords[1];
4823       args.coords[1] = filler;
4824    }
4825 
4826    /* Pack sample index */
4827    if (sample_index && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_fragment_fetch_amd))
4828       args.coords[instr->coord_components] = sample_index;
4829 
4830    if (instr->op == nir_texop_samples_identical) {
4831       struct ac_image_args txf_args = {0};
4832       memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
4833 
4834       txf_args.dmask = 0xf;
4835       txf_args.resource = args.resource;
4836       txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
4837       result = build_tex_intrinsic(ctx, instr, &txf_args);
4838 
4839       result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4840       result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
4841       goto write_result;
4842    }
4843 
4844    if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
4845         instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
4846        instr->op != nir_texop_txs && instr->op != nir_texop_fragment_fetch_amd &&
4847        instr->op != nir_texop_fragment_mask_fetch_amd) {
4848       unsigned sample_chan = instr->is_array ? 3 : 2;
4849       args.coords[sample_chan] = adjust_sample_index_using_fmask(
4850          &ctx->ac, args.coords[0], args.coords[1], instr->is_array ? args.coords[2] : NULL,
4851          args.coords[sample_chan], fmask_ptr);
4852    }
4853 
4854    if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
4855       int num_offsets = instr->src[offset_src].src.ssa->num_components;
4856       num_offsets = MIN2(num_offsets, instr->coord_components);
4857       for (unsigned i = 0; i < num_offsets; ++i) {
4858          LLVMValueRef off = ac_llvm_extract_elem(&ctx->ac, args.offset, i);
4859          if (args.a16)
4860             off = LLVMBuildTrunc(ctx->ac.builder, off, ctx->ac.i16, "");
4861          args.coords[i] = LLVMBuildAdd(ctx->ac.builder, args.coords[i], off, "");
4862       }
4863       args.offset = NULL;
4864    }
4865 
4866    /* DMASK was repurposed for GATHER4. 4 components are always
4867     * returned and DMASK works like a swizzle - it selects
4868     * the component to fetch. The only valid DMASK values are
4869     * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4870     * (red,red,red,red) etc.) The ISA document doesn't mention
4871     * this.
4872     */
4873    args.dmask = 0xf;
4874    if (instr->op == nir_texop_tg4) {
4875       if (instr->is_shadow)
4876          args.dmask = 1;
4877       else
4878          args.dmask = 1 << instr->component;
4879    }
4880 
4881    if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
4882       args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
4883       args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
4884    }
4885 
4886    /* Adjust the number of coordinates because we only need (x,y) for 2D
4887     * multisampled images and (x,y,layer) for 2D multisampled layered
4888     * images or for multisampled input attachments.
4889     */
4890    if (instr->op == nir_texop_fragment_mask_fetch_amd) {
4891       if (args.dim == ac_image_2dmsaa) {
4892          args.dim = ac_image_2d;
4893       } else {
4894          assert(args.dim == ac_image_2darraymsaa);
4895          args.dim = ac_image_2darray;
4896       }
4897    }
4898 
4899    /* Set TRUNC_COORD=0 for textureGather(). */
4900    if (instr->op == nir_texop_tg4) {
4901       LLVMValueRef dword0 = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, ctx->ac.i32_0, "");
4902       dword0 = LLVMBuildAnd(ctx->ac.builder, dword0, LLVMConstInt(ctx->ac.i32, C_008F30_TRUNC_COORD, 0), "");
4903       args.sampler = LLVMBuildInsertElement(ctx->ac.builder, args.sampler, dword0, ctx->ac.i32_0, "");
4904    }
4905 
4906    assert(instr->dest.is_ssa);
4907    args.d16 = instr->dest.ssa.bit_size == 16;
4908    args.tfe = instr->is_sparse;
4909 
4910    result = build_tex_intrinsic(ctx, instr, &args);
4911 
4912    LLVMValueRef code = NULL;
4913    if (instr->is_sparse) {
4914       code = ac_llvm_extract_elem(&ctx->ac, result, 4);
4915       result = ac_trim_vector(&ctx->ac, result, 4);
4916    }
4917 
4918    if (instr->op == nir_texop_query_levels)
4919       result =
4920          LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
4921    else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs &&
4922             instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
4923       result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4924    else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs &&
4925               instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) {
4926       LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4927       LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4928       result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, ctx->ac.i32_1, "");
4929    } else if (instr->op == nir_texop_fragment_mask_fetch_amd) {
4930       /* Use 0x76543210 if the image doesn't have FMASK. */
4931       LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
4932       tmp = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
4933       tmp = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, tmp, ctx->ac.i32_0, "");
4934       result = LLVMBuildSelect(ctx->ac.builder, tmp,
4935                                LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""),
4936                                LLVMConstInt(ctx->ac.i32, 0x76543210, false), "");
4937    } else if (nir_tex_instr_result_size(instr) != 4)
4938       result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
4939 
4940    if (instr->is_sparse)
4941       result = ac_build_concat(&ctx->ac, result, code);
4942 
4943 write_result:
4944    if (result) {
4945       assert(instr->dest.is_ssa);
4946       result = ac_to_integer(&ctx->ac, result);
4947 
4948       for (int i = ARRAY_SIZE(wctx); --i >= 0;) {
4949          result = exit_waterfall(ctx, wctx + i, result);
4950       }
4951 
4952       ctx->ssa_defs[instr->dest.ssa.index] = result;
4953    }
4954 }
4955 
visit_phi(struct ac_nir_context * ctx,nir_phi_instr * instr)4956 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
4957 {
4958    LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
4959    LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
4960 
4961    ctx->ssa_defs[instr->dest.ssa.index] = result;
4962    _mesa_hash_table_insert(ctx->phis, instr, result);
4963 }
4964 
visit_post_phi(struct ac_nir_context * ctx,nir_phi_instr * instr,LLVMValueRef llvm_phi)4965 static void visit_post_phi(struct ac_nir_context *ctx, nir_phi_instr *instr, LLVMValueRef llvm_phi)
4966 {
4967    nir_foreach_phi_src (src, instr) {
4968       LLVMBasicBlockRef block = get_block(ctx, src->pred);
4969       LLVMValueRef llvm_src = get_src(ctx, src->src);
4970 
4971       LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
4972    }
4973 }
4974 
phi_post_pass(struct ac_nir_context * ctx)4975 static void phi_post_pass(struct ac_nir_context *ctx)
4976 {
4977    hash_table_foreach(ctx->phis, entry)
4978    {
4979       visit_post_phi(ctx, (nir_phi_instr *)entry->key, (LLVMValueRef)entry->data);
4980    }
4981 }
4982 
is_def_used_in_an_export(const nir_ssa_def * def)4983 static bool is_def_used_in_an_export(const nir_ssa_def *def)
4984 {
4985    nir_foreach_use (use_src, def) {
4986       if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
4987          nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
4988          if (instr->intrinsic == nir_intrinsic_store_deref)
4989             return true;
4990       } else if (use_src->parent_instr->type == nir_instr_type_alu) {
4991          nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
4992          if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) {
4993             return true;
4994          }
4995       }
4996    }
4997    return false;
4998 }
4999 
visit_ssa_undef(struct ac_nir_context * ctx,const nir_ssa_undef_instr * instr)5000 static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr)
5001 {
5002    unsigned num_components = instr->def.num_components;
5003    LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
5004 
5005    if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
5006       LLVMValueRef undef;
5007 
5008       if (num_components == 1)
5009          undef = LLVMGetUndef(type);
5010       else {
5011          undef = LLVMGetUndef(LLVMVectorType(type, num_components));
5012       }
5013       ctx->ssa_defs[instr->def.index] = undef;
5014    } else {
5015       LLVMValueRef zero = LLVMConstInt(type, 0, false);
5016       if (num_components > 1) {
5017          zero = ac_build_gather_values_extended(&ctx->ac, &zero, 4, 0, false, false);
5018       }
5019       ctx->ssa_defs[instr->def.index] = zero;
5020    }
5021 }
5022 
visit_jump(struct ac_llvm_context * ctx,const nir_jump_instr * instr)5023 static void visit_jump(struct ac_llvm_context *ctx, const nir_jump_instr *instr)
5024 {
5025    switch (instr->type) {
5026    case nir_jump_break:
5027       ac_build_break(ctx);
5028       break;
5029    case nir_jump_continue:
5030       ac_build_continue(ctx);
5031       break;
5032    default:
5033       fprintf(stderr, "Unknown NIR jump instr: ");
5034       nir_print_instr(&instr->instr, stderr);
5035       fprintf(stderr, "\n");
5036       abort();
5037    }
5038 }
5039 
glsl_base_to_llvm_type(struct ac_llvm_context * ac,enum glsl_base_type type)5040 static LLVMTypeRef glsl_base_to_llvm_type(struct ac_llvm_context *ac, enum glsl_base_type type)
5041 {
5042    switch (type) {
5043    case GLSL_TYPE_INT:
5044    case GLSL_TYPE_UINT:
5045    case GLSL_TYPE_BOOL:
5046    case GLSL_TYPE_SUBROUTINE:
5047       return ac->i32;
5048    case GLSL_TYPE_INT8:
5049    case GLSL_TYPE_UINT8:
5050       return ac->i8;
5051    case GLSL_TYPE_INT16:
5052    case GLSL_TYPE_UINT16:
5053       return ac->i16;
5054    case GLSL_TYPE_FLOAT:
5055       return ac->f32;
5056    case GLSL_TYPE_FLOAT16:
5057       return ac->f16;
5058    case GLSL_TYPE_INT64:
5059    case GLSL_TYPE_UINT64:
5060       return ac->i64;
5061    case GLSL_TYPE_DOUBLE:
5062       return ac->f64;
5063    default:
5064       unreachable("unknown GLSL type");
5065    }
5066 }
5067 
glsl_to_llvm_type(struct ac_llvm_context * ac,const struct glsl_type * type)5068 static LLVMTypeRef glsl_to_llvm_type(struct ac_llvm_context *ac, const struct glsl_type *type)
5069 {
5070    if (glsl_type_is_scalar(type)) {
5071       return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
5072    }
5073 
5074    if (glsl_type_is_vector(type)) {
5075       return LLVMVectorType(glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
5076                             glsl_get_vector_elements(type));
5077    }
5078 
5079    if (glsl_type_is_matrix(type)) {
5080       return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_column_type(type)),
5081                            glsl_get_matrix_columns(type));
5082    }
5083 
5084    if (glsl_type_is_array(type)) {
5085       return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_array_element(type)),
5086                            glsl_get_length(type));
5087    }
5088 
5089    assert(glsl_type_is_struct_or_ifc(type));
5090 
5091    LLVMTypeRef *const member_types = alloca(glsl_get_length(type) * sizeof(LLVMTypeRef));
5092 
5093    for (unsigned i = 0; i < glsl_get_length(type); i++) {
5094       member_types[i] = glsl_to_llvm_type(ac, glsl_get_struct_field(type, i));
5095    }
5096 
5097    return LLVMStructTypeInContext(ac->context, member_types, glsl_get_length(type), false);
5098 }
5099 
visit_deref(struct ac_nir_context * ctx,nir_deref_instr * instr)5100 static void visit_deref(struct ac_nir_context *ctx, nir_deref_instr *instr)
5101 {
5102    if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared | nir_var_mem_global))
5103       return;
5104 
5105    LLVMValueRef result = NULL;
5106    switch (instr->deref_type) {
5107    case nir_deref_type_var: {
5108       struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
5109       result = entry->data;
5110       break;
5111    }
5112    case nir_deref_type_struct:
5113       if (nir_deref_mode_is(instr, nir_var_mem_global)) {
5114          nir_deref_instr *parent = nir_deref_instr_parent(instr);
5115          uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index);
5116          result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
5117                                    LLVMConstInt(ctx->ac.i32, offset, 0));
5118       } else {
5119          result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
5120                                 LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
5121       }
5122       break;
5123    case nir_deref_type_array:
5124       if (nir_deref_mode_is(instr, nir_var_mem_global)) {
5125          nir_deref_instr *parent = nir_deref_instr_parent(instr);
5126          unsigned stride = glsl_get_explicit_stride(parent->type);
5127 
5128          if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) ||
5129              (glsl_type_is_vector(parent->type) && stride == 0))
5130             stride = type_scalar_size_bytes(parent->type);
5131 
5132          assert(stride > 0);
5133          LLVMValueRef index = get_src(ctx, instr->arr.index);
5134          if (LLVMTypeOf(index) != ctx->ac.i64)
5135             index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
5136 
5137          LLVMValueRef offset =
5138             LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
5139 
5140          result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
5141       } else {
5142          result =
5143             ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
5144       }
5145       break;
5146    case nir_deref_type_ptr_as_array:
5147       if (nir_deref_mode_is(instr, nir_var_mem_global)) {
5148          unsigned stride = nir_deref_instr_array_stride(instr);
5149 
5150          LLVMValueRef index = get_src(ctx, instr->arr.index);
5151          if (LLVMTypeOf(index) != ctx->ac.i64)
5152             index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
5153 
5154          LLVMValueRef offset =
5155             LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
5156 
5157          result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
5158       } else {
5159          result =
5160             ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
5161       }
5162       break;
5163    case nir_deref_type_cast: {
5164       result = get_src(ctx, instr->parent);
5165 
5166       /* We can't use the structs from LLVM because the shader
5167        * specifies its own offsets. */
5168       LLVMTypeRef pointee_type = ctx->ac.i8;
5169       if (nir_deref_mode_is(instr, nir_var_mem_shared))
5170          pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
5171 
5172       unsigned address_space;
5173 
5174       switch (instr->modes) {
5175       case nir_var_mem_shared:
5176          address_space = AC_ADDR_SPACE_LDS;
5177          break;
5178       case nir_var_mem_global:
5179          address_space = AC_ADDR_SPACE_GLOBAL;
5180          break;
5181       default:
5182          unreachable("Unhandled address space");
5183       }
5184 
5185       LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
5186 
5187       if (LLVMTypeOf(result) != type) {
5188          if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
5189             result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
5190          } else {
5191             result = LLVMBuildIntToPtr(ctx->ac.builder, result, type, "");
5192          }
5193       }
5194       break;
5195    }
5196    default:
5197       unreachable("Unhandled deref_instr deref type");
5198    }
5199 
5200    ctx->ssa_defs[instr->dest.ssa.index] = result;
5201 }
5202 
5203 static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list);
5204 
visit_block(struct ac_nir_context * ctx,nir_block * block)5205 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
5206 {
5207    LLVMBasicBlockRef blockref = LLVMGetInsertBlock(ctx->ac.builder);
5208    LLVMValueRef first = LLVMGetFirstInstruction(blockref);
5209    if (first) {
5210       /* ac_branch_exited() might have already inserted non-phis */
5211       LLVMPositionBuilderBefore(ctx->ac.builder, LLVMGetFirstInstruction(blockref));
5212    }
5213 
5214    nir_foreach_instr(instr, block) {
5215       if (instr->type != nir_instr_type_phi)
5216          break;
5217       visit_phi(ctx, nir_instr_as_phi(instr));
5218    }
5219 
5220    LLVMPositionBuilderAtEnd(ctx->ac.builder, blockref);
5221 
5222    nir_foreach_instr (instr, block) {
5223       switch (instr->type) {
5224       case nir_instr_type_alu:
5225          visit_alu(ctx, nir_instr_as_alu(instr));
5226          break;
5227       case nir_instr_type_load_const:
5228          visit_load_const(ctx, nir_instr_as_load_const(instr));
5229          break;
5230       case nir_instr_type_intrinsic:
5231          visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5232          break;
5233       case nir_instr_type_tex:
5234          visit_tex(ctx, nir_instr_as_tex(instr));
5235          break;
5236       case nir_instr_type_phi:
5237          break;
5238       case nir_instr_type_ssa_undef:
5239          visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
5240          break;
5241       case nir_instr_type_jump:
5242          visit_jump(&ctx->ac, nir_instr_as_jump(instr));
5243          break;
5244       case nir_instr_type_deref:
5245          visit_deref(ctx, nir_instr_as_deref(instr));
5246          break;
5247       default:
5248          fprintf(stderr, "Unknown NIR instr type: ");
5249          nir_print_instr(instr, stderr);
5250          fprintf(stderr, "\n");
5251          abort();
5252       }
5253    }
5254 
5255    _mesa_hash_table_insert(ctx->defs, block, LLVMGetInsertBlock(ctx->ac.builder));
5256 }
5257 
visit_if(struct ac_nir_context * ctx,nir_if * if_stmt)5258 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
5259 {
5260    LLVMValueRef value = get_src(ctx, if_stmt->condition);
5261 
5262    nir_block *then_block = (nir_block *)exec_list_get_head(&if_stmt->then_list);
5263 
5264    ac_build_ifcc(&ctx->ac, value, then_block->index);
5265 
5266    visit_cf_list(ctx, &if_stmt->then_list);
5267 
5268    if (!exec_list_is_empty(&if_stmt->else_list)) {
5269       nir_block *else_block = (nir_block *)exec_list_get_head(&if_stmt->else_list);
5270 
5271       ac_build_else(&ctx->ac, else_block->index);
5272       visit_cf_list(ctx, &if_stmt->else_list);
5273    }
5274 
5275    ac_build_endif(&ctx->ac, then_block->index);
5276 }
5277 
visit_loop(struct ac_nir_context * ctx,nir_loop * loop)5278 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
5279 {
5280    nir_block *first_loop_block = (nir_block *)exec_list_get_head(&loop->body);
5281 
5282    ac_build_bgnloop(&ctx->ac, first_loop_block->index);
5283 
5284    visit_cf_list(ctx, &loop->body);
5285 
5286    ac_build_endloop(&ctx->ac, first_loop_block->index);
5287 }
5288 
visit_cf_list(struct ac_nir_context * ctx,struct exec_list * list)5289 static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list)
5290 {
5291    foreach_list_typed(nir_cf_node, node, node, list)
5292    {
5293       switch (node->type) {
5294       case nir_cf_node_block:
5295          visit_block(ctx, nir_cf_node_as_block(node));
5296          break;
5297 
5298       case nir_cf_node_if:
5299          visit_if(ctx, nir_cf_node_as_if(node));
5300          break;
5301 
5302       case nir_cf_node_loop:
5303          visit_loop(ctx, nir_cf_node_as_loop(node));
5304          break;
5305 
5306       default:
5307          assert(0);
5308       }
5309    }
5310 }
5311 
ac_handle_shader_output_decl(struct ac_llvm_context * ctx,struct ac_shader_abi * abi,struct nir_shader * nir,struct nir_variable * variable,gl_shader_stage stage)5312 void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi,
5313                                   struct nir_shader *nir, struct nir_variable *variable,
5314                                   gl_shader_stage stage)
5315 {
5316    unsigned output_loc = variable->data.driver_location;
5317    unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5318 
5319    /* tess ctrl has it's own load/store paths for outputs */
5320    if (stage == MESA_SHADER_TESS_CTRL)
5321       return;
5322 
5323    if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
5324        stage == MESA_SHADER_GEOMETRY) {
5325       int idx = variable->data.location + variable->data.index;
5326       if (idx == VARYING_SLOT_CLIP_DIST0) {
5327          int length = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size;
5328 
5329          if (length > 4)
5330             attrib_count = 2;
5331          else
5332             attrib_count = 1;
5333       }
5334    }
5335 
5336    bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
5337    LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
5338    for (unsigned i = 0; i < attrib_count; ++i) {
5339       for (unsigned chan = 0; chan < 4; chan++) {
5340          abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
5341             ac_build_alloca_undef(ctx, type, "");
5342       }
5343    }
5344 }
5345 
setup_scratch(struct ac_nir_context * ctx,struct nir_shader * shader)5346 static void setup_scratch(struct ac_nir_context *ctx, struct nir_shader *shader)
5347 {
5348    if (shader->scratch_size == 0)
5349       return;
5350 
5351    ctx->scratch =
5352       ac_build_alloca_undef(&ctx->ac, LLVMArrayType(ctx->ac.i8, shader->scratch_size), "scratch");
5353 }
5354 
setup_constant_data(struct ac_nir_context * ctx,struct nir_shader * shader)5355 static void setup_constant_data(struct ac_nir_context *ctx, struct nir_shader *shader)
5356 {
5357    if (!shader->constant_data)
5358       return;
5359 
5360    LLVMValueRef data = LLVMConstStringInContext(ctx->ac.context, shader->constant_data,
5361                                                 shader->constant_data_size, true);
5362    LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);
5363    LLVMValueRef global =
5364       LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "const_data", AC_ADDR_SPACE_CONST);
5365 
5366    LLVMSetInitializer(global, data);
5367    LLVMSetGlobalConstant(global, true);
5368    LLVMSetVisibility(global, LLVMHiddenVisibility);
5369    ctx->constant_data = global;
5370 }
5371 
setup_shared(struct ac_nir_context * ctx,struct nir_shader * nir)5372 static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir)
5373 {
5374    if (ctx->ac.lds)
5375       return;
5376 
5377    LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.shared_size);
5378 
5379    LLVMValueRef lds =
5380       LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS);
5381    LLVMSetAlignment(lds, 64 * 1024);
5382 
5383    ctx->ac.lds =
5384       LLVMBuildBitCast(ctx->ac.builder, lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), "");
5385 }
5386 
ac_nir_translate(struct ac_llvm_context * ac,struct ac_shader_abi * abi,const struct ac_shader_args * args,struct nir_shader * nir)5387 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
5388                       const struct ac_shader_args *args, struct nir_shader *nir)
5389 {
5390    struct ac_nir_context ctx = {0};
5391    struct nir_function *func;
5392 
5393    ctx.ac = *ac;
5394    ctx.abi = abi;
5395    ctx.args = args;
5396 
5397    ctx.stage = nir->info.stage;
5398    ctx.info = &nir->info;
5399 
5400    ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
5401 
5402    /* TODO: remove this after RADV switches to lowered IO */
5403    if (!nir->info.io_lowered) {
5404       nir_foreach_shader_out_variable(variable, nir)
5405       {
5406          ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, ctx.stage);
5407       }
5408    }
5409 
5410    ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
5411    ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
5412    ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
5413 
5414    if (ctx.abi->kill_ps_if_inf_interp)
5415       ctx.verified_interp =
5416          _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
5417 
5418    func = (struct nir_function *)exec_list_get_head(&nir->functions);
5419 
5420    nir_index_ssa_defs(func->impl);
5421    ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
5422 
5423    setup_scratch(&ctx, nir);
5424    setup_constant_data(&ctx, nir);
5425 
5426    if (gl_shader_stage_is_compute(nir->info.stage))
5427       setup_shared(&ctx, nir);
5428 
5429    if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote &&
5430        LLVM_VERSION_MAJOR < 13) {
5431       /* true = don't kill. */
5432       ctx.ac.postponed_kill = ac_build_alloca_init(&ctx.ac, ctx.ac.i1true, "");
5433    }
5434 
5435    visit_cf_list(&ctx, &func->impl->body);
5436    phi_post_pass(&ctx);
5437 
5438    if (ctx.ac.postponed_kill)
5439       ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder, ctx.ac.postponed_kill, ""));
5440 
5441    if (!gl_shader_stage_is_compute(nir->info.stage))
5442       ctx.abi->emit_outputs(ctx.abi);
5443 
5444    free(ctx.ssa_defs);
5445    ralloc_free(ctx.defs);
5446    ralloc_free(ctx.phis);
5447    ralloc_free(ctx.vars);
5448    if (ctx.abi->kill_ps_if_inf_interp)
5449       ralloc_free(ctx.verified_interp);
5450 }
5451