1 /*
2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #ifndef __AGX_COMPILER_H
26 #define __AGX_COMPILER_H
27
28 #include "compiler/nir/nir.h"
29 #include "util/u_math.h"
30 #include "util/half_float.h"
31 #include "util/u_dynarray.h"
32 #include "agx_compile.h"
33 #include "agx_opcodes.h"
34 #include "agx_minifloat.h"
35
36 enum agx_dbg {
37 AGX_DBG_MSGS = BITFIELD_BIT(0),
38 AGX_DBG_SHADERS = BITFIELD_BIT(1),
39 AGX_DBG_SHADERDB = BITFIELD_BIT(2),
40 AGX_DBG_VERBOSE = BITFIELD_BIT(3),
41 AGX_DBG_INTERNAL = BITFIELD_BIT(4),
42 };
43
44 extern int agx_debug;
45
46 /* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
47 #define AGX_NUM_REGS (256)
48
49 enum agx_index_type {
50 AGX_INDEX_NULL = 0,
51 AGX_INDEX_NORMAL = 1,
52 AGX_INDEX_IMMEDIATE = 2,
53 AGX_INDEX_UNIFORM = 3,
54 AGX_INDEX_REGISTER = 4,
55 AGX_INDEX_NIR_REGISTER = 5,
56 };
57
58 enum agx_size {
59 AGX_SIZE_16 = 0,
60 AGX_SIZE_32 = 1,
61 AGX_SIZE_64 = 2
62 };
63
64 typedef struct {
65 /* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */
66 unsigned value : 22;
67
68 /* Indicates that this source kills the referenced value (because it is the
69 * last use in a block and the source is not live after the block). Set by
70 * liveness analysis. */
71 bool kill : 1;
72
73 /* Cache hints */
74 bool cache : 1;
75 bool discard : 1;
76
77 /* src - float modifiers */
78 bool abs : 1;
79 bool neg : 1;
80
81 enum agx_size size : 2;
82 enum agx_index_type type : 3;
83 } agx_index;
84
85 static inline agx_index
agx_get_index(unsigned value,enum agx_size size)86 agx_get_index(unsigned value, enum agx_size size)
87 {
88 return (agx_index) {
89 .type = AGX_INDEX_NORMAL,
90 .value = value,
91 .size = size
92 };
93 }
94
95 static inline agx_index
agx_immediate(uint16_t imm)96 agx_immediate(uint16_t imm)
97 {
98 return (agx_index) {
99 .type = AGX_INDEX_IMMEDIATE,
100 .value = imm,
101 .size = AGX_SIZE_32
102 };
103 }
104
105 static inline agx_index
agx_immediate_f(float f)106 agx_immediate_f(float f)
107 {
108 assert(agx_minifloat_exact(f));
109 return agx_immediate(agx_minifloat_encode(f));
110 }
111
112 /* in half-words, specify r0h as 1, r1 as 2... */
113 static inline agx_index
agx_register(uint8_t imm,enum agx_size size)114 agx_register(uint8_t imm, enum agx_size size)
115 {
116 return (agx_index) {
117 .type = AGX_INDEX_REGISTER,
118 .value = imm,
119 .size = size
120 };
121 }
122
123 static inline agx_index
agx_nir_register(unsigned imm,enum agx_size size)124 agx_nir_register(unsigned imm, enum agx_size size)
125 {
126 return (agx_index) {
127 .type = AGX_INDEX_NIR_REGISTER,
128 .value = imm,
129 .size = size
130 };
131 }
132
133 /* Also in half-words */
134 static inline agx_index
agx_uniform(uint8_t imm,enum agx_size size)135 agx_uniform(uint8_t imm, enum agx_size size)
136 {
137 return (agx_index) {
138 .type = AGX_INDEX_UNIFORM,
139 .value = imm,
140 .size = size
141 };
142 }
143
144 static inline agx_index
agx_null()145 agx_null()
146 {
147 return (agx_index) { .type = AGX_INDEX_NULL };
148 }
149
150 static inline agx_index
agx_zero()151 agx_zero()
152 {
153 return agx_immediate(0);
154 }
155
156 /* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
157 * = exponent = 0, sign bit set */
158
159 static inline agx_index
agx_negzero()160 agx_negzero()
161 {
162 return agx_immediate(0x80);
163 }
164
165 static inline agx_index
agx_abs(agx_index idx)166 agx_abs(agx_index idx)
167 {
168 idx.abs = true;
169 idx.neg = false;
170 return idx;
171 }
172
173 static inline agx_index
agx_neg(agx_index idx)174 agx_neg(agx_index idx)
175 {
176 idx.neg ^= true;
177 return idx;
178 }
179
180 /* Replaces an index, preserving any modifiers */
181
182 static inline agx_index
agx_replace_index(agx_index old,agx_index replacement)183 agx_replace_index(agx_index old, agx_index replacement)
184 {
185 replacement.abs = old.abs;
186 replacement.neg = old.neg;
187 return replacement;
188 }
189
190 static inline bool
agx_is_null(agx_index idx)191 agx_is_null(agx_index idx)
192 {
193 return idx.type == AGX_INDEX_NULL;
194 }
195
196 /* Compares equivalence as references */
197
198 static inline bool
agx_is_equiv(agx_index left,agx_index right)199 agx_is_equiv(agx_index left, agx_index right)
200 {
201 return (left.type == right.type) && (left.value == right.value);
202 }
203
204 #define AGX_MAX_DESTS 1
205 #define AGX_MAX_SRCS 5
206
207 enum agx_icond {
208 AGX_ICOND_UEQ = 0,
209 AGX_ICOND_ULT = 1,
210 AGX_ICOND_UGT = 2,
211 /* unknown */
212 AGX_ICOND_SEQ = 4,
213 AGX_ICOND_SLT = 5,
214 AGX_ICOND_SGT = 6,
215 /* unknown */
216 };
217
218 enum agx_fcond {
219 AGX_FCOND_EQ = 0,
220 AGX_FCOND_LT = 1,
221 AGX_FCOND_GT = 2,
222 AGX_FCOND_LTN = 3,
223 /* unknown */
224 AGX_FCOND_GE = 5,
225 AGX_FCOND_LE = 6,
226 AGX_FCOND_GTN = 7,
227 };
228
229 enum agx_round {
230 AGX_ROUND_RTZ = 0,
231 AGX_ROUND_RTE = 1,
232 };
233
234 enum agx_convert {
235 AGX_CONVERT_U8_TO_F = 0,
236 AGX_CONVERT_S8_TO_F = 1,
237 AGX_CONVERT_F_TO_U16 = 4,
238 AGX_CONVERT_F_TO_S16 = 5,
239 AGX_CONVERT_U16_TO_F = 6,
240 AGX_CONVERT_S16_TO_F = 7,
241 AGX_CONVERT_F_TO_U32 = 8,
242 AGX_CONVERT_F_TO_S32 = 9,
243 AGX_CONVERT_U32_TO_F = 10,
244 AGX_CONVERT_S32_TO_F = 11
245 };
246
247 enum agx_lod_mode {
248 AGX_LOD_MODE_AUTO_LOD = 0,
249 AGX_LOD_MODE_LOD_MIN = 6,
250 AGX_LOD_GRAD = 8,
251 AGX_LOD_GRAD_MIN = 12
252 };
253
254 enum agx_dim {
255 AGX_DIM_TEX_1D = 0,
256 AGX_DIM_TEX_1D_ARRAY = 1,
257 AGX_DIM_TEX_2D = 2,
258 AGX_DIM_TEX_2D_ARRAY = 3,
259 AGX_DIM_TEX_2D_MS = 4,
260 AGX_DIM_TEX_3D = 5,
261 AGX_DIM_TEX_CUBE = 6,
262 AGX_DIM_TEX_CUBE_ARRAY = 7
263 };
264
265 /* Forward declare for branch target */
266 struct agx_block;
267
268 typedef struct {
269 /* Must be first */
270 struct list_head link;
271
272 enum agx_opcode op;
273
274 /* Data flow */
275 agx_index dest[AGX_MAX_DESTS];
276 agx_index src[AGX_MAX_SRCS];
277
278 union {
279 uint32_t imm;
280 uint32_t writeout;
281 uint32_t truth_table;
282 uint32_t component;
283 uint32_t channels;
284 uint32_t bfi_mask;
285 enum agx_sr sr;
286 enum agx_icond icond;
287 enum agx_fcond fcond;
288 enum agx_format format;
289 enum agx_round round;
290 enum agx_lod_mode lod_mode;
291 struct agx_block *target;
292 };
293
294 /* For load varying */
295 bool perspective : 1;
296
297 /* Invert icond/fcond */
298 bool invert_cond : 1;
299
300 /* TODO: Handle tex ops more efficient */
301 enum agx_dim dim : 3;
302
303 /* Final st_vary op */
304 bool last : 1;
305
306 /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
307 unsigned shift : 4;
308
309 /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
310 * scoreboarding (everything but memory load/store and texturing). */
311 unsigned scoreboard : 1;
312
313 /* Number of nested control flow layers to jump by */
314 unsigned nest : 2;
315
316 /* Output modifiers */
317 bool saturate : 1;
318 unsigned mask : 4;
319 } agx_instr;
320
321 struct agx_block;
322
323 typedef struct agx_block {
324 /* Link to next block. Must be first */
325 struct list_head link;
326
327 /* List of instructions emitted for the current block */
328 struct list_head instructions;
329
330 /* Index of the block in source order */
331 unsigned name;
332
333 /* Control flow graph */
334 struct agx_block *successors[2];
335 struct set *predecessors;
336 bool unconditional_jumps;
337
338 /* Liveness analysis results */
339 BITSET_WORD *live_in;
340 BITSET_WORD *live_out;
341
342 /* Register allocation */
343 BITSET_DECLARE(regs_out, AGX_NUM_REGS);
344
345 /* Offset of the block in the emitted binary */
346 off_t offset;
347
348 /** Available for passes to use for metadata */
349 uint8_t pass_flags;
350 } agx_block;
351
352 typedef struct {
353 nir_shader *nir;
354 gl_shader_stage stage;
355 struct list_head blocks; /* list of agx_block */
356 struct agx_shader_info *out;
357 struct agx_shader_key *key;
358
359 /* Remapping table for varyings indexed by driver_location */
360 unsigned varyings[AGX_MAX_VARYINGS];
361
362 /* Handling phi nodes is still TODO while we bring up other parts of the
363 * driver. YOLO the mapping of nir_register to fixed hardware registers */
364 unsigned *nir_regalloc;
365
366 /* We reserve the top (XXX: that hurts thread count) */
367 unsigned max_register;
368
369 /* Place to start pushing new values */
370 unsigned push_base;
371
372 /* For creating temporaries */
373 unsigned alloc;
374
375 /* I don't really understand how writeout ops work yet */
376 bool did_writeout;
377
378 /* Has r0l been zeroed yet due to control flow? */
379 bool any_cf;
380
381 /** Computed metadata */
382 bool has_liveness;
383
384 /* Number of nested control flow structures within the innermost loop. Since
385 * NIR is just loop and if-else, this is the number of nested if-else
386 * statements in the loop */
387 unsigned loop_nesting;
388
389 /* During instruction selection, for inserting control flow */
390 agx_block *current_block;
391 agx_block *continue_block;
392 agx_block *break_block;
393 agx_block *after_block;
394
395 /* Stats for shader-db */
396 unsigned loop_count;
397 unsigned spills;
398 unsigned fills;
399 } agx_context;
400
401 static inline void
agx_remove_instruction(agx_instr * ins)402 agx_remove_instruction(agx_instr *ins)
403 {
404 list_del(&ins->link);
405 }
406
407 static inline agx_index
agx_temp(agx_context * ctx,enum agx_size size)408 agx_temp(agx_context *ctx, enum agx_size size)
409 {
410 return agx_get_index(ctx->alloc++, size);
411 }
412
413 static enum agx_size
agx_size_for_bits(unsigned bits)414 agx_size_for_bits(unsigned bits)
415 {
416 switch (bits) {
417 case 1:
418 case 16: return AGX_SIZE_16;
419 case 32: return AGX_SIZE_32;
420 case 64: return AGX_SIZE_64;
421 default: unreachable("Invalid bitsize");
422 }
423 }
424
425 static inline agx_index
agx_src_index(nir_src * src)426 agx_src_index(nir_src *src)
427 {
428 if (!src->is_ssa) {
429 return agx_nir_register(src->reg.reg->index,
430 agx_size_for_bits(nir_src_bit_size(*src)));
431 }
432
433 return agx_get_index(src->ssa->index,
434 agx_size_for_bits(nir_src_bit_size(*src)));
435 }
436
437 static inline agx_index
agx_dest_index(nir_dest * dst)438 agx_dest_index(nir_dest *dst)
439 {
440 if (!dst->is_ssa) {
441 return agx_nir_register(dst->reg.reg->index,
442 agx_size_for_bits(nir_dest_bit_size(*dst)));
443 }
444
445 return agx_get_index(dst->ssa.index,
446 agx_size_for_bits(nir_dest_bit_size(*dst)));
447 }
448
449 /* Iterators for AGX IR */
450
451 #define agx_foreach_block(ctx, v) \
452 list_for_each_entry(agx_block, v, &ctx->blocks, link)
453
454 #define agx_foreach_block_rev(ctx, v) \
455 list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
456
457 #define agx_foreach_block_from(ctx, from, v) \
458 list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
459
460 #define agx_foreach_block_from_rev(ctx, from, v) \
461 list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
462
463 #define agx_foreach_instr_in_block(block, v) \
464 list_for_each_entry(agx_instr, v, &(block)->instructions, link)
465
466 #define agx_foreach_instr_in_block_rev(block, v) \
467 list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
468
469 #define agx_foreach_instr_in_block_safe(block, v) \
470 list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
471
472 #define agx_foreach_instr_in_block_safe_rev(block, v) \
473 list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
474
475 #define agx_foreach_instr_in_block_from(block, v, from) \
476 list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
477
478 #define agx_foreach_instr_in_block_from_rev(block, v, from) \
479 list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link)
480
481 #define agx_foreach_instr_global(ctx, v) \
482 agx_foreach_block(ctx, v_block) \
483 agx_foreach_instr_in_block(v_block, v)
484
485 #define agx_foreach_instr_global_rev(ctx, v) \
486 agx_foreach_block_rev(ctx, v_block) \
487 agx_foreach_instr_in_block_rev(v_block, v)
488
489 #define agx_foreach_instr_global_safe(ctx, v) \
490 agx_foreach_block(ctx, v_block) \
491 agx_foreach_instr_in_block_safe(v_block, v)
492
493 #define agx_foreach_instr_global_safe_rev(ctx, v) \
494 agx_foreach_block_rev(ctx, v_block) \
495 agx_foreach_instr_in_block_safe_rev(v_block, v)
496
497 /* Based on set_foreach, expanded with automatic type casts */
498
499 #define agx_foreach_successor(blk, v) \
500 agx_block *v; \
501 agx_block **_v; \
502 for (_v = (agx_block **) &blk->successors[0], \
503 v = *_v; \
504 v != NULL && _v < (agx_block **) &blk->successors[2]; \
505 _v++, v = *_v) \
506
507 #define agx_foreach_predecessor(blk, v) \
508 struct set_entry *_entry_##v; \
509 agx_block *v; \
510 for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
511 v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL); \
512 _entry_##v != NULL; \
513 _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
514 v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL))
515
516 #define agx_foreach_src(ins, v) \
517 for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
518
519 #define agx_foreach_dest(ins, v) \
520 for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)
521
522 static inline agx_instr *
agx_prev_op(agx_instr * ins)523 agx_prev_op(agx_instr *ins)
524 {
525 return list_last_entry(&(ins->link), agx_instr, link);
526 }
527
528 static inline agx_instr *
agx_next_op(agx_instr * ins)529 agx_next_op(agx_instr *ins)
530 {
531 return list_first_entry(&(ins->link), agx_instr, link);
532 }
533
534 static inline agx_block *
agx_next_block(agx_block * block)535 agx_next_block(agx_block *block)
536 {
537 return list_first_entry(&(block->link), agx_block, link);
538 }
539
540 static inline agx_block *
agx_exit_block(agx_context * ctx)541 agx_exit_block(agx_context *ctx)
542 {
543 agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
544 assert(!last->successors[0] && !last->successors[1]);
545 return last;
546 }
547
548 /* Like in NIR, for use with the builder */
549
550 enum agx_cursor_option {
551 agx_cursor_after_block,
552 agx_cursor_before_instr,
553 agx_cursor_after_instr
554 };
555
556 typedef struct {
557 enum agx_cursor_option option;
558
559 union {
560 agx_block *block;
561 agx_instr *instr;
562 };
563 } agx_cursor;
564
565 static inline agx_cursor
agx_after_block(agx_block * block)566 agx_after_block(agx_block *block)
567 {
568 return (agx_cursor) {
569 .option = agx_cursor_after_block,
570 .block = block
571 };
572 }
573
574 static inline agx_cursor
agx_before_instr(agx_instr * instr)575 agx_before_instr(agx_instr *instr)
576 {
577 return (agx_cursor) {
578 .option = agx_cursor_before_instr,
579 .instr = instr
580 };
581 }
582
583 static inline agx_cursor
agx_after_instr(agx_instr * instr)584 agx_after_instr(agx_instr *instr)
585 {
586 return (agx_cursor) {
587 .option = agx_cursor_after_instr,
588 .instr = instr
589 };
590 }
591
592 /* IR builder in terms of cursor infrastructure */
593
594 typedef struct {
595 agx_context *shader;
596 agx_cursor cursor;
597 } agx_builder;
598
599 static inline agx_builder
agx_init_builder(agx_context * ctx,agx_cursor cursor)600 agx_init_builder(agx_context *ctx, agx_cursor cursor)
601 {
602 return (agx_builder) {
603 .shader = ctx,
604 .cursor = cursor
605 };
606 }
607
608 /* Insert an instruction at the cursor and move the cursor */
609
610 static inline void
agx_builder_insert(agx_cursor * cursor,agx_instr * I)611 agx_builder_insert(agx_cursor *cursor, agx_instr *I)
612 {
613 switch (cursor->option) {
614 case agx_cursor_after_instr:
615 list_add(&I->link, &cursor->instr->link);
616 cursor->instr = I;
617 return;
618
619 case agx_cursor_after_block:
620 list_addtail(&I->link, &cursor->block->instructions);
621 cursor->option = agx_cursor_after_instr;
622 cursor->instr = I;
623 return;
624
625 case agx_cursor_before_instr:
626 list_addtail(&I->link, &cursor->instr->link);
627 cursor->option = agx_cursor_after_instr;
628 cursor->instr = I;
629 return;
630 }
631
632 unreachable("Invalid cursor option");
633 }
634
635 /* Uniform file management */
636
637 agx_index
638 agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size,
639 unsigned index, unsigned length);
640
641 /* Routines defined for AIR */
642
643 void agx_print_instr(agx_instr *I, FILE *fp);
644 void agx_print_block(agx_block *block, FILE *fp);
645 void agx_print_shader(agx_context *ctx, FILE *fp);
646 void agx_optimizer(agx_context *ctx);
647 void agx_dce(agx_context *ctx);
648 void agx_ra(agx_context *ctx);
649 void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
650
651 void agx_compute_liveness(agx_context *ctx);
652 void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
653
654 #endif
655