1 /*
2  * Copyright 2016 Bas Nieuwenhuizen
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18  * USE OR OTHER DEALINGS IN THE SOFTWARE.
19  *
20  * The above copyright notice and this permission notice (including the
21  * next paragraph) shall be included in all copies or substantial portions
22  * of the Software.
23  *
24  */
25 #ifndef AC_LLVM_BUILD_H
26 #define AC_LLVM_BUILD_H
27 
28 #include <stdbool.h>
29 #include <llvm-c/Core.h>
30 #include "compiler/nir/nir.h"
31 #include "amd_family.h"
32 #include "ac_shader_util.h"
33 #include "ac_shader_args.h"
34 #include "ac_shader_abi.h"
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 
40 enum {
41 	AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
42 	AC_ADDR_SPACE_GLOBAL = 1,
43 	AC_ADDR_SPACE_GDS = 2,
44 	AC_ADDR_SPACE_LDS = 3,
45 	AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
46 	AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
47 };
48 
49 #define AC_WAIT_LGKM	(1 << 0) /* LDS, GDS, constant, message */
50 #define AC_WAIT_VLOAD	(1 << 1) /* VMEM load/sample instructions */
51 #define AC_WAIT_VSTORE	(1 << 2) /* VMEM store instructions */
52 
53 struct ac_llvm_flow;
54 struct ac_llvm_compiler;
55 enum ac_float_mode;
56 
57 struct ac_llvm_flow_state {
58 	struct ac_llvm_flow *stack;
59 	unsigned depth_max;
60 	unsigned depth;
61 };
62 
63 struct ac_llvm_context {
64 	LLVMContextRef context;
65 	LLVMModuleRef module;
66 	LLVMBuilderRef builder;
67 
68 	LLVMValueRef main_function;
69 
70 	LLVMTypeRef voidt;
71 	LLVMTypeRef i1;
72 	LLVMTypeRef i8;
73 	LLVMTypeRef i16;
74 	LLVMTypeRef i32;
75 	LLVMTypeRef i64;
76 	LLVMTypeRef i128;
77 	LLVMTypeRef intptr;
78 	LLVMTypeRef f16;
79 	LLVMTypeRef f32;
80 	LLVMTypeRef f64;
81 	LLVMTypeRef v2i16;
82 	LLVMTypeRef v4i16;
83 	LLVMTypeRef v2f16;
84 	LLVMTypeRef v4f16;
85 	LLVMTypeRef v2i32;
86 	LLVMTypeRef v3i32;
87 	LLVMTypeRef v4i32;
88 	LLVMTypeRef v2f32;
89 	LLVMTypeRef v3f32;
90 	LLVMTypeRef v4f32;
91 	LLVMTypeRef v8i32;
92 	LLVMTypeRef iN_wavemask;
93 	LLVMTypeRef iN_ballotmask;
94 
95 	LLVMValueRef i8_0;
96 	LLVMValueRef i8_1;
97 	LLVMValueRef i16_0;
98 	LLVMValueRef i16_1;
99 	LLVMValueRef i32_0;
100 	LLVMValueRef i32_1;
101 	LLVMValueRef i64_0;
102 	LLVMValueRef i64_1;
103 	LLVMValueRef i128_0;
104 	LLVMValueRef i128_1;
105 	LLVMValueRef f16_0;
106 	LLVMValueRef f16_1;
107 	LLVMValueRef f32_0;
108 	LLVMValueRef f32_1;
109 	LLVMValueRef f64_0;
110 	LLVMValueRef f64_1;
111 	LLVMValueRef i1true;
112 	LLVMValueRef i1false;
113 
114 	/* Temporary helper to implement demote_to_helper:
115          * True = live lanes
116          * False = demoted lanes
117          */
118 	LLVMValueRef postponed_kill;
119 
120 	/* Since ac_nir_translate makes a local copy of ac_llvm_context, there
121 	 * are two ac_llvm_contexts. Declare a pointer here, so that the control
122 	 * flow stack is shared by both ac_llvm_contexts.
123 	 */
124 	struct ac_llvm_flow_state *flow;
125 
126 	unsigned range_md_kind;
127 	unsigned invariant_load_md_kind;
128 	unsigned uniform_md_kind;
129 	LLVMValueRef empty_md;
130 
131 	enum chip_class chip_class;
132 	enum radeon_family family;
133 
134 	unsigned wave_size;
135 	unsigned ballot_mask_bits;
136 
137 	unsigned float_mode;
138 
139 	LLVMValueRef lds;
140 };
141 
142 void
143 ac_llvm_context_init(struct ac_llvm_context *ctx,
144 		     struct ac_llvm_compiler *compiler,
145 		     enum chip_class chip_class, enum radeon_family family,
146 		     enum ac_float_mode float_mode, unsigned wave_size,
147 		     unsigned ballot_mask_bits);
148 
149 void
150 ac_llvm_context_dispose(struct ac_llvm_context *ctx);
151 
152 int
153 ac_get_llvm_num_components(LLVMValueRef value);
154 
155 int
156 ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
157 
158 LLVMValueRef
159 ac_llvm_extract_elem(struct ac_llvm_context *ac,
160 		     LLVMValueRef value,
161 		     int index);
162 
163 unsigned ac_get_type_size(LLVMTypeRef type);
164 
165 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
166 LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
167 LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
168 LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
169 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
170 
171 LLVMValueRef
172 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
173 		   LLVMTypeRef return_type, LLVMValueRef *params,
174 		   unsigned param_count, unsigned attrib_mask);
175 
176 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);
177 
178 LLVMValueRef
179 ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
180 	     unsigned count_incoming, LLVMValueRef *values,
181 	     LLVMBasicBlockRef *blocks);
182 
183 void ac_build_s_barrier(struct ac_llvm_context *ctx);
184 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
185 				   LLVMValueRef *pvgpr);
186 
187 LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx,
188 				   nir_scope scope);
189 
190 LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
191 LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
192 				 LLVMValueRef value);
193 
194 LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
195 
196 LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value);
197 
198 LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
199 
200 LLVMValueRef
201 ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
202 			       unsigned value_count, unsigned component);
203 
204 LLVMValueRef
205 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
206 				LLVMValueRef *values,
207 				unsigned value_count,
208 				unsigned value_stride,
209 				bool load,
210 				bool always_vector);
211 LLVMValueRef
212 ac_build_gather_values(struct ac_llvm_context *ctx,
213 		       LLVMValueRef *values,
214 		       unsigned value_count);
215 
216 LLVMValueRef
217 ac_extract_components(struct ac_llvm_context *ctx,
218 		      LLVMValueRef value,
219 		      unsigned start,
220 		      unsigned channels);
221 
222 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
223 				     LLVMValueRef value,
224 				     unsigned num_channels);
225 LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
226 
227 LLVMValueRef
228 ac_build_fdiv(struct ac_llvm_context *ctx,
229 	      LLVMValueRef num,
230 	      LLVMValueRef den);
231 
232 LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
233 				LLVMValueRef num,
234 				LLVMValueRef multiplier,
235 				LLVMValueRef pre_shift,
236 				LLVMValueRef post_shift,
237 				LLVMValueRef increment);
238 LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
239 				    LLVMValueRef num,
240 				    LLVMValueRef multiplier,
241 				    LLVMValueRef pre_shift,
242 				    LLVMValueRef post_shift,
243 				    LLVMValueRef increment);
244 LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
245 					      LLVMValueRef num,
246 					      LLVMValueRef multiplier,
247 					      LLVMValueRef post_shift);
248 
249 void
250 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
251 		       bool is_deriv, bool is_array, bool is_lod,
252 		       LLVMValueRef *coords_arg,
253 		       LLVMValueRef *derivs_arg);
254 
255 
256 LLVMValueRef
257 ac_build_fs_interp(struct ac_llvm_context *ctx,
258 		   LLVMValueRef llvm_chan,
259 		   LLVMValueRef attr_number,
260 		   LLVMValueRef params,
261 		   LLVMValueRef i,
262 		   LLVMValueRef j);
263 
264 LLVMValueRef
265 ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
266 		       LLVMValueRef llvm_chan,
267 		       LLVMValueRef attr_number,
268 		       LLVMValueRef params,
269 		       LLVMValueRef i,
270 		       LLVMValueRef j);
271 
272 LLVMValueRef
273 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
274 		       LLVMValueRef parameter,
275 		       LLVMValueRef llvm_chan,
276 		       LLVMValueRef attr_number,
277 		       LLVMValueRef params);
278 
279 LLVMValueRef
280 ac_build_gep_ptr(struct ac_llvm_context *ctx,
281 	         LLVMValueRef base_ptr,
282 	         LLVMValueRef index);
283 
284 LLVMValueRef
285 ac_build_gep0(struct ac_llvm_context *ctx,
286 	      LLVMValueRef base_ptr,
287 	      LLVMValueRef index);
288 LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
289 				  LLVMValueRef index);
290 
291 void
292 ac_build_indexed_store(struct ac_llvm_context *ctx,
293 		       LLVMValueRef base_ptr, LLVMValueRef index,
294 		       LLVMValueRef value);
295 
296 LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
297 			   LLVMValueRef index);
298 LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
299 				     LLVMValueRef base_ptr, LLVMValueRef index);
300 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
301 				   LLVMValueRef base_ptr, LLVMValueRef index);
302 LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
303 				   LLVMValueRef base_ptr, LLVMValueRef index);
304 
305 void
306 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
307 			    LLVMValueRef rsrc,
308 			    LLVMValueRef vdata,
309 			    unsigned num_channels,
310 			    LLVMValueRef voffset,
311 			    LLVMValueRef soffset,
312 			    unsigned inst_offset,
313 			    unsigned cache_policy);
314 
315 void
316 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
317 			     LLVMValueRef rsrc,
318 			     LLVMValueRef data,
319 			     LLVMValueRef vindex,
320 			     LLVMValueRef voffset,
321 			     unsigned cache_policy);
322 
323 LLVMValueRef
324 ac_build_buffer_load(struct ac_llvm_context *ctx,
325 		     LLVMValueRef rsrc,
326 		     int num_channels,
327 		     LLVMValueRef vindex,
328 		     LLVMValueRef voffset,
329 		     LLVMValueRef soffset,
330 		     unsigned inst_offset,
331 		     unsigned cache_policy,
332 		     bool can_speculate,
333 		     bool allow_smem);
334 
335 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
336 					 LLVMValueRef rsrc,
337 					 LLVMValueRef vindex,
338 					 LLVMValueRef voffset,
339 					 unsigned num_channels,
340 					 unsigned cache_policy,
341 					 bool can_speculate,
342 					 bool d16);
343 
344 LLVMValueRef
345 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
346 			    LLVMValueRef rsrc,
347 			    LLVMValueRef voffset,
348 			    LLVMValueRef soffset,
349 			    LLVMValueRef immoffset,
350 			    unsigned cache_policy);
351 
352 LLVMValueRef
353 ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
354 			   LLVMValueRef rsrc,
355 			   LLVMValueRef voffset,
356 			   LLVMValueRef soffset,
357 			   LLVMValueRef immoffset,
358 			   unsigned cache_policy);
359 
360 LLVMValueRef
361 ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
362 			     LLVMValueRef rsrc,
363 			     LLVMValueRef vindex,
364 			     LLVMValueRef voffset,
365 			     LLVMValueRef soffset,
366 			     LLVMValueRef immoffset,
367 			     unsigned num_channels,
368 			     unsigned dfmt,
369 			     unsigned nfmt,
370 			     unsigned cache_policy,
371 			     bool can_speculate);
372 
373 LLVMValueRef
374 ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
375 			  LLVMValueRef rsrc,
376 			  LLVMValueRef voffset,
377 			  LLVMValueRef soffset,
378 			  LLVMValueRef immoffset,
379 			  unsigned num_channels,
380 			  unsigned dfmt,
381 			  unsigned nfmt,
382 			  unsigned cache_policy,
383 		          bool can_speculate);
384 
385 /* For ac_build_fetch_format.
386  *
387  * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
388  */
389 enum {
390 	AC_FETCH_FORMAT_FLOAT = 0,
391 	AC_FETCH_FORMAT_FIXED,
392 	AC_FETCH_FORMAT_UNORM,
393 	AC_FETCH_FORMAT_SNORM,
394 	AC_FETCH_FORMAT_USCALED,
395 	AC_FETCH_FORMAT_SSCALED,
396 	AC_FETCH_FORMAT_UINT,
397 	AC_FETCH_FORMAT_SINT,
398 };
399 
400 LLVMValueRef
401 ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
402 			       unsigned log_size,
403 			       unsigned num_channels,
404 			       unsigned format,
405 			       bool reverse,
406 			       bool known_aligned,
407 			       LLVMValueRef rsrc,
408 			       LLVMValueRef vindex,
409 			       LLVMValueRef voffset,
410 			       LLVMValueRef soffset,
411 			       unsigned cache_policy,
412 			       bool can_speculate);
413 
414 void
415 ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
416 			     LLVMValueRef rsrc,
417 			     LLVMValueRef vdata,
418 			     LLVMValueRef voffset,
419 			     LLVMValueRef soffset,
420 			     unsigned cache_policy);
421 
422 void
423 ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
424 			    LLVMValueRef rsrc,
425 			    LLVMValueRef vdata,
426 			    LLVMValueRef voffset,
427 			    LLVMValueRef soffset,
428 			    unsigned cache_policy);
429 
430 void
431 ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
432 			      LLVMValueRef rsrc,
433 			      LLVMValueRef vdata,
434 			      LLVMValueRef vindex,
435 			      LLVMValueRef voffset,
436 			      LLVMValueRef soffset,
437 			      LLVMValueRef immoffset,
438 			      unsigned num_channels,
439 			      unsigned dfmt,
440 			      unsigned nfmt,
441 			      unsigned cache_policy);
442 
443 void
444 ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
445 			   LLVMValueRef rsrc,
446 			   LLVMValueRef vdata,
447 			   LLVMValueRef voffset,
448 			   LLVMValueRef soffset,
449 			   LLVMValueRef immoffset,
450 			   unsigned num_channels,
451 			   unsigned dfmt,
452 			   unsigned nfmt,
453 			   unsigned cache_policy);
454 
455 LLVMValueRef
456 ac_get_thread_id(struct ac_llvm_context *ctx);
457 
458 #define AC_TID_MASK_TOP_LEFT 0xfffffffc
459 #define AC_TID_MASK_TOP      0xfffffffd
460 #define AC_TID_MASK_LEFT     0xfffffffe
461 
462 LLVMValueRef
463 ac_build_ddxy(struct ac_llvm_context *ctx,
464 	      uint32_t mask,
465 	      int idx,
466 	      LLVMValueRef val);
467 
468 #define AC_SENDMSG_GS 2
469 #define AC_SENDMSG_GS_DONE 3
470 #define AC_SENDMSG_GS_ALLOC_REQ 9
471 
472 #define AC_SENDMSG_GS_OP_NOP      (0 << 4)
473 #define AC_SENDMSG_GS_OP_CUT      (1 << 4)
474 #define AC_SENDMSG_GS_OP_EMIT     (2 << 4)
475 #define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
476 
477 void ac_build_sendmsg(struct ac_llvm_context *ctx,
478 		      uint32_t msg,
479 		      LLVMValueRef wave_id);
480 
481 LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx,
482 			   LLVMValueRef arg,
483 			   LLVMTypeRef dst_type);
484 
485 LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
486 			  LLVMValueRef arg,
487 			  LLVMTypeRef dst_type);
488 LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
489 			   LLVMValueRef b);
490 LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
491 			   LLVMValueRef b);
492 LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
493 			   LLVMValueRef b);
494 LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
495 			   LLVMValueRef b);
496 LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
497 LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
498 LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
499 
500 struct ac_export_args {
501 	LLVMValueRef out[4];
502         unsigned target;
503         unsigned enabled_channels;
504         bool compr;
505         bool done;
506         bool valid_mask;
507 };
508 
509 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
510 
511 void ac_build_export_null(struct ac_llvm_context *ctx);
512 
513 enum ac_image_opcode {
514 	ac_image_sample,
515 	ac_image_gather4,
516 	ac_image_load,
517 	ac_image_load_mip,
518 	ac_image_store,
519 	ac_image_store_mip,
520 	ac_image_get_lod,
521 	ac_image_get_resinfo,
522 	ac_image_atomic,
523 	ac_image_atomic_cmpswap,
524 };
525 
526 enum ac_atomic_op {
527 	ac_atomic_swap,
528 	ac_atomic_add,
529 	ac_atomic_sub,
530 	ac_atomic_smin,
531 	ac_atomic_umin,
532 	ac_atomic_smax,
533 	ac_atomic_umax,
534 	ac_atomic_and,
535 	ac_atomic_or,
536 	ac_atomic_xor,
537 	ac_atomic_inc_wrap,
538 	ac_atomic_dec_wrap,
539 };
540 
541 /* These cache policy bits match the definitions used by the LLVM intrinsics. */
542 enum ac_image_cache_policy {
543 	ac_glc = 1 << 0, /* per-CU cache control */
544 	ac_slc = 1 << 1, /* global L2 cache control */
545 	ac_dlc = 1 << 2, /* per-shader-array cache control */
546 	ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */
547 };
548 
549 struct ac_image_args {
550 	enum ac_image_opcode opcode : 4;
551 	enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
552 	enum ac_image_dim dim : 3;
553 	unsigned dmask : 4;
554 	unsigned cache_policy : 3;
555 	bool unorm : 1;
556 	bool level_zero : 1;
557 	bool d16 : 1; /* data and return values are 16-bit, requires GFX8+ */
558 	unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
559 
560 	LLVMValueRef resource;
561 	LLVMValueRef sampler;
562 	LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
563 	LLVMValueRef offset;
564 	LLVMValueRef bias;
565 	LLVMValueRef compare;
566 	LLVMValueRef derivs[6];
567 	LLVMValueRef coords[4];
568 	LLVMValueRef lod; // also used by ac_image_get_resinfo
569 	LLVMValueRef min_lod;
570 };
571 
572 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
573 				   struct ac_image_args *a);
574 LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
575 					     LLVMValueRef rsrc);
576 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
577 				    LLVMValueRef args[2]);
578 LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
579 				     LLVMValueRef args[2]);
580 LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
581 				     LLVMValueRef args[2]);
582 LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
583 				 LLVMValueRef args[2], unsigned bits, bool hi);
584 LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
585 				 LLVMValueRef args[2], unsigned bits, bool hi);
586 LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
587 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
588 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
589 			  LLVMValueRef offset, LLVMValueRef width,
590 			  bool is_signed);
591 LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
592 			   LLVMValueRef s1, LLVMValueRef s2);
593 LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
594 			   LLVMValueRef s1, LLVMValueRef s2);
595 
596 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
597 
598 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
599 			   unsigned bitsize);
600 
601 LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
602 			    LLVMValueRef src1, LLVMValueRef src2,
603 			    unsigned bitsize);
604 
605 LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
606 			    unsigned bitsize);
607 
608 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
609 			    unsigned bitsize);
610 
611 LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
612 
613 LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
614 				       LLVMValueRef src0);
615 
616 void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
617 			    LLVMValueRef main_fn,
618 			    uint8_t *vs_output_param_offset,
619 			    uint32_t num_outputs,
620 			    uint32_t skip_output_mask,
621 			    uint8_t *num_param_exports);
622 void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
623 
624 void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
625 LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
626 			 LLVMValueRef dw_addr);
627 void ac_lds_store(struct ac_llvm_context *ctx,
628 		  LLVMValueRef dw_addr, LLVMValueRef value);
629 
630 LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
631 			 LLVMTypeRef dst_type,
632 			 LLVMValueRef src0);
633 
634 LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
635 LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
636 
637 void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
638 void ac_build_break(struct ac_llvm_context *ctx);
639 void ac_build_continue(struct ac_llvm_context *ctx);
640 void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
641 void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
642 void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
643 void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
644 void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
645 		 int lable_id);
646 void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
647 		  int lable_id);
648 
649 LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
650 			     const char *name);
651 LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
652 				   const char *name);
653 
654 LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
655 			 LLVMTypeRef type);
656 
657 LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
658 			    unsigned count);
659 
660 LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
661 			     unsigned rshift, unsigned bitwidth);
662 
663 void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
664 			      LLVMValueRef *addr, bool is_array_tex);
665 
666 LLVMValueRef
667 ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
668 
669 LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
670 					      LLVMValueRef src, LLVMValueRef lane);
671 
672 LLVMValueRef
673 ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
674 
675 LLVMValueRef
676 ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
677 
678 LLVMValueRef
679 ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
680 
681 LLVMValueRef
682 ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
683 
684 LLVMValueRef
685 ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
686 
687 LLVMValueRef
688 ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
689 
690 /**
691  * Common arguments for a scan/reduce operation that accumulates per-wave
692  * values across an entire workgroup, while respecting the order of waves.
693  */
694 struct ac_wg_scan {
695 	bool enable_reduce;
696 	bool enable_exclusive;
697 	bool enable_inclusive;
698 	nir_op op;
699 	LLVMValueRef src; /* clobbered! */
700 	LLVMValueRef result_reduce;
701 	LLVMValueRef result_exclusive;
702 	LLVMValueRef result_inclusive;
703 	LLVMValueRef extra;
704 	LLVMValueRef waveidx;
705 	LLVMValueRef numwaves; /* only needed for "reduce" operations */
706 
707 	/* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
708 	LLVMValueRef scratch;
709 	unsigned maxwaves;
710 };
711 
712 void
713 ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
714 void
715 ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
716 void
717 ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
718 
719 void
720 ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
721 void
722 ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
723 void
724 ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
725 
726 LLVMValueRef
727 ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
728 		unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
729 
730 LLVMValueRef
731 ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
732 
733 LLVMValueRef
734 ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
735 		   unsigned bitsize);
736 
737 LLVMValueRef
738 ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
739 		    unsigned bitsize);
740 
741 LLVMValueRef
742 ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0,
743 		      unsigned bitsize);
744 
745 LLVMValueRef
746 ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
747 
748 LLVMValueRef
749 ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
750 
751 LLVMValueRef
752 ac_build_is_helper_invocation(struct ac_llvm_context *ctx);
753 
754 LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
755 			   LLVMValueRef *args, unsigned num_args);
756 
757 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
758 				 LLVMValueRef ptr, LLVMValueRef val,
759 				 const char *sync_scope);
760 
761 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
762 				      LLVMValueRef cmp, LLVMValueRef val,
763 				      const char *sync_scope);
764 
765 void
766 ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
767 		LLVMValueRef stencil, LLVMValueRef samplemask,
768 		struct ac_export_args *args);
769 
770 void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id,
771 				   LLVMValueRef vtx_cnt, LLVMValueRef prim_cnt);
772 
773 struct ac_ngg_prim {
774 	unsigned num_vertices;
775 	LLVMValueRef isnull;
776 	LLVMValueRef index[3];
777 	LLVMValueRef edgeflag[3];
778 	LLVMValueRef passthrough;
779 };
780 
781 LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx,
782 				 const struct ac_ngg_prim *prim);
783 void ac_build_export_prim(struct ac_llvm_context *ctx,
784 			  const struct ac_ngg_prim *prim);
785 
786 static inline LLVMValueRef
ac_get_arg(struct ac_llvm_context * ctx,struct ac_arg arg)787 ac_get_arg(struct ac_llvm_context *ctx, struct ac_arg arg)
788 {
789 	assert(arg.used);
790 	return LLVMGetParam(ctx->main_function, arg.arg_index);
791 }
792 
793 enum ac_llvm_calling_convention {
794 	AC_LLVM_AMDGPU_VS = 87,
795 	AC_LLVM_AMDGPU_GS = 88,
796 	AC_LLVM_AMDGPU_PS = 89,
797 	AC_LLVM_AMDGPU_CS = 90,
798 	AC_LLVM_AMDGPU_HS = 93,
799 };
800 
801 LLVMValueRef ac_build_main(const struct ac_shader_args *args,
802 			   struct ac_llvm_context *ctx,
803 			   enum ac_llvm_calling_convention convention,
804 			   const char *name, LLVMTypeRef ret_type,
805 			   LLVMModuleRef module);
806 void ac_build_s_endpgm(struct ac_llvm_context *ctx);
807 
808 LLVMValueRef ac_prefix_bitcount(struct ac_llvm_context *ctx,
809 				LLVMValueRef mask, LLVMValueRef index);
810 LLVMValueRef ac_prefix_bitcount_2x64(struct ac_llvm_context *ctx,
811 				     LLVMValueRef mask[2], LLVMValueRef index);
812 void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx,
813 						 LLVMValueRef is_odd,
814 						 LLVMValueRef flatshade_first,
815 						 LLVMValueRef index[3]);
816 
817 #ifdef __cplusplus
818 }
819 #endif
820 
821 #endif
822