1 /*
2  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3  * Copyright (C) 2020 Collabora Ltd.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #ifndef __AGX_COMPILER_H
26 #define __AGX_COMPILER_H
27 
28 #include "compiler/nir/nir.h"
29 #include "util/u_math.h"
30 #include "util/half_float.h"
31 #include "util/u_dynarray.h"
32 #include "agx_compile.h"
33 #include "agx_opcodes.h"
34 #include "agx_minifloat.h"
35 
36 enum agx_dbg {
37    AGX_DBG_MSGS        = BITFIELD_BIT(0),
38    AGX_DBG_SHADERS     = BITFIELD_BIT(1),
39    AGX_DBG_SHADERDB    = BITFIELD_BIT(2),
40    AGX_DBG_VERBOSE     = BITFIELD_BIT(3),
41    AGX_DBG_INTERNAL    = BITFIELD_BIT(4),
42 };
43 
44 extern int agx_debug;
45 
46 /* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
47 #define AGX_NUM_REGS (256)
48 
49 enum agx_index_type {
50    AGX_INDEX_NULL = 0,
51    AGX_INDEX_NORMAL = 1,
52    AGX_INDEX_IMMEDIATE = 2,
53    AGX_INDEX_UNIFORM = 3,
54    AGX_INDEX_REGISTER = 4,
55    AGX_INDEX_NIR_REGISTER = 5,
56 };
57 
58 enum agx_size {
59    AGX_SIZE_16 = 0,
60    AGX_SIZE_32 = 1,
61    AGX_SIZE_64 = 2
62 };
63 
64 typedef struct {
65    /* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */
66    unsigned value : 22;
67 
68    /* Indicates that this source kills the referenced value (because it is the
69     * last use in a block and the source is not live after the block). Set by
70     * liveness analysis. */
71    bool kill : 1;
72 
73    /* Cache hints */
74    bool cache : 1;
75    bool discard : 1;
76 
77    /* src - float modifiers */
78    bool abs : 1;
79    bool neg : 1;
80 
81    enum agx_size size : 2;
82    enum agx_index_type type : 3;
83 } agx_index;
84 
85 static inline agx_index
agx_get_index(unsigned value,enum agx_size size)86 agx_get_index(unsigned value, enum agx_size size)
87 {
88    return (agx_index) {
89       .type = AGX_INDEX_NORMAL,
90       .value = value,
91       .size = size
92    };
93 }
94 
95 static inline agx_index
agx_immediate(uint16_t imm)96 agx_immediate(uint16_t imm)
97 {
98    return (agx_index) {
99       .type = AGX_INDEX_IMMEDIATE,
100       .value = imm,
101       .size = AGX_SIZE_32
102    };
103 }
104 
105 static inline agx_index
agx_immediate_f(float f)106 agx_immediate_f(float f)
107 {
108    assert(agx_minifloat_exact(f));
109    return agx_immediate(agx_minifloat_encode(f));
110 }
111 
112 /* in half-words, specify r0h as 1, r1 as 2... */
113 static inline agx_index
agx_register(uint8_t imm,enum agx_size size)114 agx_register(uint8_t imm, enum agx_size size)
115 {
116    return (agx_index) {
117       .type = AGX_INDEX_REGISTER,
118       .value = imm,
119       .size = size
120    };
121 }
122 
123 static inline agx_index
agx_nir_register(unsigned imm,enum agx_size size)124 agx_nir_register(unsigned imm, enum agx_size size)
125 {
126    return (agx_index) {
127       .type = AGX_INDEX_NIR_REGISTER,
128       .value = imm,
129       .size = size
130    };
131 }
132 
133 /* Also in half-words */
134 static inline agx_index
agx_uniform(uint8_t imm,enum agx_size size)135 agx_uniform(uint8_t imm, enum agx_size size)
136 {
137    return (agx_index) {
138       .type = AGX_INDEX_UNIFORM,
139       .value = imm,
140       .size = size
141    };
142 }
143 
144 static inline agx_index
agx_null()145 agx_null()
146 {
147    return (agx_index) { .type = AGX_INDEX_NULL };
148 }
149 
150 static inline agx_index
agx_zero()151 agx_zero()
152 {
153    return agx_immediate(0);
154 }
155 
156 /* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
157  * = exponent = 0, sign bit set */
158 
159 static inline agx_index
agx_negzero()160 agx_negzero()
161 {
162    return agx_immediate(0x80);
163 }
164 
165 static inline agx_index
agx_abs(agx_index idx)166 agx_abs(agx_index idx)
167 {
168    idx.abs = true;
169    idx.neg = false;
170    return idx;
171 }
172 
173 static inline agx_index
agx_neg(agx_index idx)174 agx_neg(agx_index idx)
175 {
176    idx.neg ^= true;
177    return idx;
178 }
179 
180 /* Replaces an index, preserving any modifiers */
181 
182 static inline agx_index
agx_replace_index(agx_index old,agx_index replacement)183 agx_replace_index(agx_index old, agx_index replacement)
184 {
185    replacement.abs = old.abs;
186    replacement.neg = old.neg;
187    return replacement;
188 }
189 
190 static inline bool
agx_is_null(agx_index idx)191 agx_is_null(agx_index idx)
192 {
193    return idx.type == AGX_INDEX_NULL;
194 }
195 
196 /* Compares equivalence as references */
197 
198 static inline bool
agx_is_equiv(agx_index left,agx_index right)199 agx_is_equiv(agx_index left, agx_index right)
200 {
201    return (left.type == right.type) && (left.value == right.value);
202 }
203 
204 #define AGX_MAX_DESTS 1
205 #define AGX_MAX_SRCS 5
206 
207 enum agx_icond {
208    AGX_ICOND_UEQ = 0,
209    AGX_ICOND_ULT = 1,
210    AGX_ICOND_UGT = 2,
211    /* unknown */
212    AGX_ICOND_SEQ = 4,
213    AGX_ICOND_SLT = 5,
214    AGX_ICOND_SGT = 6,
215    /* unknown */
216 };
217 
218 enum agx_fcond {
219    AGX_FCOND_EQ = 0,
220    AGX_FCOND_LT = 1,
221    AGX_FCOND_GT = 2,
222    AGX_FCOND_LTN = 3,
223    /* unknown */
224    AGX_FCOND_GE = 5,
225    AGX_FCOND_LE = 6,
226    AGX_FCOND_GTN = 7,
227 };
228 
229 enum agx_round {
230    AGX_ROUND_RTZ = 0,
231    AGX_ROUND_RTE = 1,
232 };
233 
234 enum agx_convert {
235    AGX_CONVERT_U8_TO_F = 0,
236    AGX_CONVERT_S8_TO_F = 1,
237    AGX_CONVERT_F_TO_U16 = 4,
238    AGX_CONVERT_F_TO_S16 = 5,
239    AGX_CONVERT_U16_TO_F = 6,
240    AGX_CONVERT_S16_TO_F = 7,
241    AGX_CONVERT_F_TO_U32 = 8,
242    AGX_CONVERT_F_TO_S32 = 9,
243    AGX_CONVERT_U32_TO_F = 10,
244    AGX_CONVERT_S32_TO_F = 11
245 };
246 
247 enum agx_lod_mode {
248    AGX_LOD_MODE_AUTO_LOD = 0,
249    AGX_LOD_MODE_LOD_MIN = 6,
250    AGX_LOD_GRAD = 8,
251    AGX_LOD_GRAD_MIN = 12
252 };
253 
254 enum agx_dim {
255    AGX_DIM_TEX_1D = 0,
256    AGX_DIM_TEX_1D_ARRAY = 1,
257    AGX_DIM_TEX_2D = 2,
258    AGX_DIM_TEX_2D_ARRAY = 3,
259    AGX_DIM_TEX_2D_MS = 4,
260    AGX_DIM_TEX_3D = 5,
261    AGX_DIM_TEX_CUBE = 6,
262    AGX_DIM_TEX_CUBE_ARRAY = 7
263 };
264 
265 /* Forward declare for branch target */
266 struct agx_block;
267 
268 typedef struct {
269    /* Must be first */
270    struct list_head link;
271 
272    enum agx_opcode op;
273 
274    /* Data flow */
275    agx_index dest[AGX_MAX_DESTS];
276    agx_index src[AGX_MAX_SRCS];
277 
278    union {
279       uint32_t imm;
280       uint32_t writeout;
281       uint32_t truth_table;
282       uint32_t component;
283       uint32_t channels;
284       uint32_t bfi_mask;
285       enum agx_sr sr;
286       enum agx_icond icond;
287       enum agx_fcond fcond;
288       enum agx_format format;
289       enum agx_round round;
290       enum agx_lod_mode lod_mode;
291       struct agx_block *target;
292    };
293 
294    /* For load varying */
295    bool perspective : 1;
296 
297    /* Invert icond/fcond */
298    bool invert_cond : 1;
299 
300    /* TODO: Handle tex ops more efficient */
301    enum agx_dim dim : 3;
302 
303    /* Final st_vary op */
304    bool last : 1;
305 
306    /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
307    unsigned shift : 4;
308 
309    /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
310     * scoreboarding (everything but memory load/store and texturing). */
311    unsigned scoreboard : 1;
312 
313    /* Number of nested control flow layers to jump by */
314    unsigned nest : 2;
315 
316    /* Output modifiers */
317    bool saturate : 1;
318    unsigned mask : 4;
319 } agx_instr;
320 
321 struct agx_block;
322 
323 typedef struct agx_block {
324    /* Link to next block. Must be first */
325    struct list_head link;
326 
327    /* List of instructions emitted for the current block */
328    struct list_head instructions;
329 
330    /* Index of the block in source order */
331    unsigned name;
332 
333    /* Control flow graph */
334    struct agx_block *successors[2];
335    struct set *predecessors;
336    bool unconditional_jumps;
337 
338    /* Liveness analysis results */
339    BITSET_WORD *live_in;
340    BITSET_WORD *live_out;
341 
342    /* Register allocation */
343    BITSET_DECLARE(regs_out, AGX_NUM_REGS);
344 
345    /* Offset of the block in the emitted binary */
346    off_t offset;
347 
348    /** Available for passes to use for metadata */
349    uint8_t pass_flags;
350 } agx_block;
351 
352 typedef struct {
353    nir_shader *nir;
354    gl_shader_stage stage;
355    struct list_head blocks; /* list of agx_block */
356    struct agx_shader_info *out;
357    struct agx_shader_key *key;
358 
359    /* Remapping table for varyings indexed by driver_location */
360    unsigned varyings[AGX_MAX_VARYINGS];
361 
362    /* Handling phi nodes is still TODO while we bring up other parts of the
363     * driver. YOLO the mapping of nir_register to fixed hardware registers */
364    unsigned *nir_regalloc;
365 
366    /* We reserve the top (XXX: that hurts thread count) */
367    unsigned max_register;
368 
369    /* Place to start pushing new values */
370    unsigned push_base;
371 
372    /* For creating temporaries */
373    unsigned alloc;
374 
375    /* I don't really understand how writeout ops work yet */
376    bool did_writeout;
377 
378    /* Has r0l been zeroed yet due to control flow? */
379    bool any_cf;
380 
381    /** Computed metadata */
382    bool has_liveness;
383 
384    /* Number of nested control flow structures within the innermost loop. Since
385     * NIR is just loop and if-else, this is the number of nested if-else
386     * statements in the loop */
387    unsigned loop_nesting;
388 
389    /* During instruction selection, for inserting control flow */
390    agx_block *current_block;
391    agx_block *continue_block;
392    agx_block *break_block;
393    agx_block *after_block;
394 
395    /* Stats for shader-db */
396    unsigned loop_count;
397    unsigned spills;
398    unsigned fills;
399 } agx_context;
400 
401 static inline void
agx_remove_instruction(agx_instr * ins)402 agx_remove_instruction(agx_instr *ins)
403 {
404    list_del(&ins->link);
405 }
406 
407 static inline agx_index
agx_temp(agx_context * ctx,enum agx_size size)408 agx_temp(agx_context *ctx, enum agx_size size)
409 {
410    return agx_get_index(ctx->alloc++, size);
411 }
412 
413 static enum agx_size
agx_size_for_bits(unsigned bits)414 agx_size_for_bits(unsigned bits)
415 {
416    switch (bits) {
417    case 1:
418    case 16: return AGX_SIZE_16;
419    case 32: return AGX_SIZE_32;
420    case 64: return AGX_SIZE_64;
421    default: unreachable("Invalid bitsize");
422    }
423 }
424 
425 static inline agx_index
agx_src_index(nir_src * src)426 agx_src_index(nir_src *src)
427 {
428    if (!src->is_ssa) {
429       return agx_nir_register(src->reg.reg->index,
430             agx_size_for_bits(nir_src_bit_size(*src)));
431    }
432 
433    return agx_get_index(src->ssa->index,
434          agx_size_for_bits(nir_src_bit_size(*src)));
435 }
436 
437 static inline agx_index
agx_dest_index(nir_dest * dst)438 agx_dest_index(nir_dest *dst)
439 {
440    if (!dst->is_ssa) {
441       return agx_nir_register(dst->reg.reg->index,
442             agx_size_for_bits(nir_dest_bit_size(*dst)));
443    }
444 
445    return agx_get_index(dst->ssa.index,
446          agx_size_for_bits(nir_dest_bit_size(*dst)));
447 }
448 
449 /* Iterators for AGX IR */
450 
451 #define agx_foreach_block(ctx, v) \
452    list_for_each_entry(agx_block, v, &ctx->blocks, link)
453 
454 #define agx_foreach_block_rev(ctx, v) \
455    list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
456 
457 #define agx_foreach_block_from(ctx, from, v) \
458    list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
459 
460 #define agx_foreach_block_from_rev(ctx, from, v) \
461    list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
462 
463 #define agx_foreach_instr_in_block(block, v) \
464    list_for_each_entry(agx_instr, v, &(block)->instructions, link)
465 
466 #define agx_foreach_instr_in_block_rev(block, v) \
467    list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
468 
469 #define agx_foreach_instr_in_block_safe(block, v) \
470    list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
471 
472 #define agx_foreach_instr_in_block_safe_rev(block, v) \
473    list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
474 
475 #define agx_foreach_instr_in_block_from(block, v, from) \
476    list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
477 
478 #define agx_foreach_instr_in_block_from_rev(block, v, from) \
479    list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link)
480 
481 #define agx_foreach_instr_global(ctx, v) \
482    agx_foreach_block(ctx, v_block) \
483       agx_foreach_instr_in_block(v_block, v)
484 
485 #define agx_foreach_instr_global_rev(ctx, v) \
486    agx_foreach_block_rev(ctx, v_block) \
487       agx_foreach_instr_in_block_rev(v_block, v)
488 
489 #define agx_foreach_instr_global_safe(ctx, v) \
490    agx_foreach_block(ctx, v_block) \
491       agx_foreach_instr_in_block_safe(v_block, v)
492 
493 #define agx_foreach_instr_global_safe_rev(ctx, v) \
494    agx_foreach_block_rev(ctx, v_block) \
495       agx_foreach_instr_in_block_safe_rev(v_block, v)
496 
497 /* Based on set_foreach, expanded with automatic type casts */
498 
499 #define agx_foreach_successor(blk, v) \
500    agx_block *v; \
501    agx_block **_v; \
502    for (_v = (agx_block **) &blk->successors[0], \
503          v = *_v; \
504          v != NULL && _v < (agx_block **) &blk->successors[2]; \
505          _v++, v = *_v) \
506 
507 #define agx_foreach_predecessor(blk, v) \
508    struct set_entry *_entry_##v; \
509    agx_block *v; \
510    for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
511          v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL);  \
512          _entry_##v != NULL; \
513          _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
514          v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL))
515 
516 #define agx_foreach_src(ins, v) \
517    for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
518 
519 #define agx_foreach_dest(ins, v) \
520    for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)
521 
522 static inline agx_instr *
agx_prev_op(agx_instr * ins)523 agx_prev_op(agx_instr *ins)
524 {
525    return list_last_entry(&(ins->link), agx_instr, link);
526 }
527 
528 static inline agx_instr *
agx_next_op(agx_instr * ins)529 agx_next_op(agx_instr *ins)
530 {
531    return list_first_entry(&(ins->link), agx_instr, link);
532 }
533 
534 static inline agx_block *
agx_next_block(agx_block * block)535 agx_next_block(agx_block *block)
536 {
537    return list_first_entry(&(block->link), agx_block, link);
538 }
539 
540 static inline agx_block *
agx_exit_block(agx_context * ctx)541 agx_exit_block(agx_context *ctx)
542 {
543    agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
544    assert(!last->successors[0] && !last->successors[1]);
545    return last;
546 }
547 
548 /* Like in NIR, for use with the builder */
549 
550 enum agx_cursor_option {
551    agx_cursor_after_block,
552    agx_cursor_before_instr,
553    agx_cursor_after_instr
554 };
555 
556 typedef struct {
557    enum agx_cursor_option option;
558 
559    union {
560       agx_block *block;
561       agx_instr *instr;
562    };
563 } agx_cursor;
564 
565 static inline agx_cursor
agx_after_block(agx_block * block)566 agx_after_block(agx_block *block)
567 {
568    return (agx_cursor) {
569       .option = agx_cursor_after_block,
570       .block = block
571    };
572 }
573 
574 static inline agx_cursor
agx_before_instr(agx_instr * instr)575 agx_before_instr(agx_instr *instr)
576 {
577    return (agx_cursor) {
578       .option = agx_cursor_before_instr,
579       .instr = instr
580    };
581 }
582 
583 static inline agx_cursor
agx_after_instr(agx_instr * instr)584 agx_after_instr(agx_instr *instr)
585 {
586    return (agx_cursor) {
587       .option = agx_cursor_after_instr,
588       .instr = instr
589    };
590 }
591 
592 /* IR builder in terms of cursor infrastructure */
593 
594 typedef struct {
595    agx_context *shader;
596    agx_cursor cursor;
597 } agx_builder;
598 
599 static inline agx_builder
agx_init_builder(agx_context * ctx,agx_cursor cursor)600 agx_init_builder(agx_context *ctx, agx_cursor cursor)
601 {
602    return (agx_builder) {
603       .shader = ctx,
604       .cursor = cursor
605    };
606 }
607 
608 /* Insert an instruction at the cursor and move the cursor */
609 
610 static inline void
agx_builder_insert(agx_cursor * cursor,agx_instr * I)611 agx_builder_insert(agx_cursor *cursor, agx_instr *I)
612 {
613    switch (cursor->option) {
614    case agx_cursor_after_instr:
615       list_add(&I->link, &cursor->instr->link);
616       cursor->instr = I;
617       return;
618 
619    case agx_cursor_after_block:
620       list_addtail(&I->link, &cursor->block->instructions);
621       cursor->option = agx_cursor_after_instr;
622       cursor->instr = I;
623       return;
624 
625    case agx_cursor_before_instr:
626       list_addtail(&I->link, &cursor->instr->link);
627       cursor->option = agx_cursor_after_instr;
628       cursor->instr = I;
629       return;
630    }
631 
632    unreachable("Invalid cursor option");
633 }
634 
635 /* Uniform file management */
636 
637 agx_index
638 agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size,
639       unsigned index, unsigned length);
640 
641 /* Routines defined for AIR */
642 
643 void agx_print_instr(agx_instr *I, FILE *fp);
644 void agx_print_block(agx_block *block, FILE *fp);
645 void agx_print_shader(agx_context *ctx, FILE *fp);
646 void agx_optimizer(agx_context *ctx);
647 void agx_dce(agx_context *ctx);
648 void agx_ra(agx_context *ctx);
649 void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
650 
651 void agx_compute_liveness(agx_context *ctx);
652 void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
653 
654 #endif
655