1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #ifndef __BIFROST_COMPILER_H
28 #define __BIFROST_COMPILER_H
29 
30 #include "bifrost.h"
31 #include "bi_opcodes.h"
32 #include "compiler/nir/nir.h"
33 #include "panfrost/util/pan_ir.h"
34 #include "util/u_math.h"
35 #include "util/half_float.h"
36 
37 /* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
38  * To express widen, use the correpsonding replicated form, i.e. H01 = identity
39  * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
40  * use the replicated form (interpretation is governed by the opcode). For
41  * 8-bit lanes with two channels, use replicated forms for replicated forms
42  * (TODO: what about others?). For 8-bit lanes with four channels using
43  * matching form (TODO: what about others?).
44  */
45 
46 enum bi_swizzle {
47         /* 16-bit swizzle ordering deliberate for fast compute */
48         BI_SWIZZLE_H00 = 0, /* = B0101 */
49         BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
50         BI_SWIZZLE_H10 = 2, /* = B2301 */
51         BI_SWIZZLE_H11 = 3, /* = B2323 */
52 
53         /* replication order should be maintained for fast compute */
54         BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
55         BI_SWIZZLE_B1111 = 5,
56         BI_SWIZZLE_B2222 = 6,
57         BI_SWIZZLE_B3333 = 7,
58 
59         /* totally special for explicit pattern matching */
60         BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */
61         BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */
62         BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
63         BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
64 
65         BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
66 };
67 
68 /* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
69  * folding and Valhall constant optimization. */
70 
71 static inline uint32_t
bi_apply_swizzle(uint32_t value,enum bi_swizzle swz)72 bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
73 {
74    const uint16_t *h = (const uint16_t *) &value;
75    const uint8_t  *b = (const uint8_t *) &value;
76 
77 #define H(h0, h1) (h[h0] | (h[h1] << 16))
78 #define B(b0, b1, b2, b3) (b[b0] | (b[b1] << 8) | (b[b2] << 16) | (b[b3] << 24))
79 
80    switch (swz) {
81    case BI_SWIZZLE_H00: return H(0, 0);
82    case BI_SWIZZLE_H01: return H(0, 1);
83    case BI_SWIZZLE_H10: return H(1, 0);
84    case BI_SWIZZLE_H11: return H(1, 1);
85    case BI_SWIZZLE_B0000: return B(0, 0, 0, 0);
86    case BI_SWIZZLE_B1111: return B(1, 1, 1, 1);
87    case BI_SWIZZLE_B2222: return B(2, 2, 2, 2);
88    case BI_SWIZZLE_B3333: return B(3, 3, 3, 3);
89    case BI_SWIZZLE_B0011: return B(0, 0, 1, 1);
90    case BI_SWIZZLE_B2233: return B(2, 2, 3, 3);
91    case BI_SWIZZLE_B1032: return B(1, 0, 3, 2);
92    case BI_SWIZZLE_B3210: return B(3, 2, 1, 0);
93    case BI_SWIZZLE_B0022: return B(0, 0, 2, 2);
94    }
95 
96 #undef H
97 #undef B
98 
99    unreachable("Invalid swizzle");
100 }
101 
102 enum bi_index_type {
103         BI_INDEX_NULL = 0,
104         BI_INDEX_NORMAL = 1,
105         BI_INDEX_REGISTER = 2,
106         BI_INDEX_CONSTANT = 3,
107         BI_INDEX_PASS = 4,
108         BI_INDEX_FAU = 5
109 };
110 
111 typedef struct {
112         uint32_t value;
113 
114         /* modifiers, should only be set if applicable for a given instruction.
115          * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
116          * applicable, neg plays the role of not */
117         bool abs : 1;
118         bool neg : 1;
119 
120         /* The last use of a value, should be purged from the register cache.
121          * Set by liveness analysis. */
122         bool discard : 1;
123 
124         /* For a source, the swizzle. For a destination, acts a bit like a
125          * write mask. Identity for the full 32-bit, H00 for only caring about
126          * the lower half, other values unused. */
127         enum bi_swizzle swizzle : 4;
128         uint32_t offset : 2;
129         bool reg : 1;
130         enum bi_index_type type : 3;
131 } bi_index;
132 
133 static inline bi_index
bi_get_index(unsigned value,bool is_reg,unsigned offset)134 bi_get_index(unsigned value, bool is_reg, unsigned offset)
135 {
136         return (bi_index) {
137                 .type = BI_INDEX_NORMAL,
138                 .value = value,
139                 .swizzle = BI_SWIZZLE_H01,
140                 .offset = offset,
141                 .reg = is_reg,
142         };
143 }
144 
145 static inline bi_index
bi_register(unsigned reg)146 bi_register(unsigned reg)
147 {
148         assert(reg < 64);
149 
150         return (bi_index) {
151                 .type = BI_INDEX_REGISTER,
152                 .swizzle = BI_SWIZZLE_H01,
153                 .value = reg
154         };
155 }
156 
157 static inline bi_index
bi_imm_u32(uint32_t imm)158 bi_imm_u32(uint32_t imm)
159 {
160         return (bi_index) {
161                 .type = BI_INDEX_CONSTANT,
162                 .swizzle = BI_SWIZZLE_H01,
163                 .value = imm
164         };
165 }
166 
167 static inline bi_index
bi_imm_f32(float imm)168 bi_imm_f32(float imm)
169 {
170         return bi_imm_u32(fui(imm));
171 }
172 
173 static inline bi_index
bi_null()174 bi_null()
175 {
176         return (bi_index) { .type = BI_INDEX_NULL };
177 }
178 
179 static inline bi_index
bi_zero()180 bi_zero()
181 {
182         return bi_imm_u32(0);
183 }
184 
185 static inline bi_index
bi_passthrough(enum bifrost_packed_src value)186 bi_passthrough(enum bifrost_packed_src value)
187 {
188         return (bi_index) {
189                 .type = BI_INDEX_PASS,
190                 .swizzle = BI_SWIZZLE_H01,
191                 .value = value
192         };
193 }
194 
195 /* Read back power-efficent garbage, TODO maybe merge with null? */
196 static inline bi_index
bi_dontcare()197 bi_dontcare()
198 {
199         return bi_passthrough(BIFROST_SRC_FAU_HI);
200 }
201 
202 /* Extracts a word from a vectored index */
203 static inline bi_index
bi_word(bi_index idx,unsigned component)204 bi_word(bi_index idx, unsigned component)
205 {
206         idx.offset += component;
207         return idx;
208 }
209 
210 /* Helps construct swizzles */
211 static inline bi_index
bi_swz_16(bi_index idx,bool x,bool y)212 bi_swz_16(bi_index idx, bool x, bool y)
213 {
214         assert(idx.swizzle == BI_SWIZZLE_H01);
215         idx.swizzle = BI_SWIZZLE_H00 | (x << 1) | y;
216         return idx;
217 }
218 
219 static inline bi_index
bi_half(bi_index idx,bool upper)220 bi_half(bi_index idx, bool upper)
221 {
222         return bi_swz_16(idx, upper, upper);
223 }
224 
225 static inline bi_index
bi_byte(bi_index idx,unsigned lane)226 bi_byte(bi_index idx, unsigned lane)
227 {
228         assert(idx.swizzle == BI_SWIZZLE_H01);
229         assert(lane < 4);
230         idx.swizzle = BI_SWIZZLE_B0000 + lane;
231         return idx;
232 }
233 
234 static inline bi_index
bi_abs(bi_index idx)235 bi_abs(bi_index idx)
236 {
237         idx.abs = true;
238         return idx;
239 }
240 
241 static inline bi_index
bi_neg(bi_index idx)242 bi_neg(bi_index idx)
243 {
244         idx.neg ^= true;
245         return idx;
246 }
247 
248 static inline bi_index
bi_discard(bi_index idx)249 bi_discard(bi_index idx)
250 {
251         idx.discard = true;
252         return idx;
253 }
254 
255 /* Additive identity in IEEE 754 arithmetic */
256 static inline bi_index
bi_negzero()257 bi_negzero()
258 {
259         return bi_neg(bi_zero());
260 }
261 
262 /* Replaces an index, preserving any modifiers */
263 
264 static inline bi_index
bi_replace_index(bi_index old,bi_index replacement)265 bi_replace_index(bi_index old, bi_index replacement)
266 {
267         replacement.abs = old.abs;
268         replacement.neg = old.neg;
269         replacement.swizzle = old.swizzle;
270         return replacement;
271 }
272 
273 /* Remove any modifiers. This has the property:
274  *
275  *     replace_index(x, strip_index(x)) = x
276  *
277  * This ensures it is suitable to use when lowering sources to moves */
278 
279 static inline bi_index
bi_strip_index(bi_index index)280 bi_strip_index(bi_index index)
281 {
282         index.abs = index.neg = false;
283         index.swizzle = BI_SWIZZLE_H01;
284         return index;
285 }
286 
287 /* For bitwise instructions */
288 #define bi_not(x) bi_neg(x)
289 
290 static inline bi_index
bi_imm_u8(uint8_t imm)291 bi_imm_u8(uint8_t imm)
292 {
293         return bi_byte(bi_imm_u32(imm), 0);
294 }
295 
296 static inline bi_index
bi_imm_u16(uint16_t imm)297 bi_imm_u16(uint16_t imm)
298 {
299         return bi_half(bi_imm_u32(imm), false);
300 }
301 
302 static inline bi_index
bi_imm_uintN(uint32_t imm,unsigned sz)303 bi_imm_uintN(uint32_t imm, unsigned sz)
304 {
305         assert(sz == 8 || sz == 16 || sz == 32);
306         return (sz == 8) ? bi_imm_u8(imm) :
307                 (sz == 16) ? bi_imm_u16(imm) :
308                 bi_imm_u32(imm);
309 }
310 
311 static inline bi_index
bi_imm_f16(float imm)312 bi_imm_f16(float imm)
313 {
314         return bi_imm_u16(_mesa_float_to_half(imm));
315 }
316 
317 static inline bool
bi_is_null(bi_index idx)318 bi_is_null(bi_index idx)
319 {
320         return idx.type == BI_INDEX_NULL;
321 }
322 
323 static inline bool
bi_is_ssa(bi_index idx)324 bi_is_ssa(bi_index idx)
325 {
326         return idx.type == BI_INDEX_NORMAL && !idx.reg;
327 }
328 
329 /* Compares equivalence as references. Does not compare offsets, swizzles, or
330  * modifiers. In other words, this forms bi_index equivalence classes by
331  * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */
332 
333 static inline bool
bi_is_equiv(bi_index left,bi_index right)334 bi_is_equiv(bi_index left, bi_index right)
335 {
336         return (left.type == right.type) &&
337                 (left.reg == right.reg) &&
338                 (left.value == right.value);
339 }
340 
341 /* A stronger equivalence relation that requires the indices access the
342  * same offset, useful for RA/scheduling to see what registers will
343  * correspond to */
344 
345 static inline bool
bi_is_word_equiv(bi_index left,bi_index right)346 bi_is_word_equiv(bi_index left, bi_index right)
347 {
348         return bi_is_equiv(left, right) && left.offset == right.offset;
349 }
350 
351 #define BI_MAX_DESTS 2
352 #define BI_MAX_SRCS 4
353 
354 typedef struct {
355         /* Must be first */
356         struct list_head link;
357 
358         enum bi_opcode op;
359 
360         /* Data flow */
361         bi_index dest[BI_MAX_DESTS];
362         bi_index src[BI_MAX_SRCS];
363 
364         /* For a branch */
365         struct bi_block *branch_target;
366 
367         /* These don't fit neatly with anything else.. */
368         enum bi_register_format register_format;
369         enum bi_vecsize vecsize;
370 
371         /* Can we spill the value written here? Used to prevent
372          * useless double fills */
373         bool no_spill;
374 
375         /* Override table, inducing a DTSEL_IMM pair if nonzero */
376         enum bi_table table;
377 
378         /* Everything after this MUST NOT be accessed directly, since
379          * interpretation depends on opcodes */
380 
381         /* Destination modifiers */
382         union {
383                 enum bi_clamp clamp;
384                 bool saturate;
385                 bool not_result;
386                 unsigned dest_mod;
387         };
388 
389         /* Immediates. All seen alone in an instruction, except for varying/texture
390          * which are specified jointly for VARTEX */
391         union {
392                 uint32_t shift;
393                 uint32_t fill;
394                 uint32_t index;
395                 uint32_t attribute_index;
396                 int32_t branch_offset;
397 
398                 struct {
399                         uint32_t varying_index;
400                         uint32_t sampler_index;
401                         uint32_t texture_index;
402                 };
403 
404                 /* TEXC, ATOM_CX: # of staging registers used */
405                 uint32_t sr_count;
406         };
407 
408         /* Modifiers specific to particular instructions are thrown in a union */
409         union {
410                 enum bi_adj adj; /* FEXP_TABLE.u4 */
411                 enum bi_atom_opc atom_opc; /* atomics */
412                 enum bi_func func; /* FPOW_SC_DET */
413                 enum bi_function function; /* LD_VAR_FLAT */
414                 enum bi_mux mux; /* MUX */
415                 enum bi_sem sem; /* FMAX, FMIN */
416                 enum bi_source source; /* LD_GCLK */
417                 bool scale; /* VN_ASST2, FSINCOS_OFFSET */
418                 bool offset; /* FSIN_TABLE, FOCS_TABLE */
419                 bool mask; /* CLZ */
420                 bool threads; /* IMULD, IMOV_FMA */
421                 bool combine; /* BRANCHC */
422                 bool format; /* LEA_TEX */
423 
424                 struct {
425                         enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
426                         enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */
427                 };
428 
429                 struct {
430                         enum bi_result_type result_type; /* FCMP, ICMP */
431                         enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */
432                 };
433 
434                 struct {
435                         enum bi_stack_mode stack_mode; /* JUMP_EX */
436                         bool test_mode;
437                 };
438 
439                 struct {
440                         enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
441                         bool preserve_null; /* SEG_ADD, SEG_SUB */
442                         enum bi_extend extend; /* LOAD, IMUL */
443                 };
444 
445                 struct {
446                         enum bi_sample sample; /* VAR_TEX, LD_VAR */
447                         enum bi_update update; /* VAR_TEX, LD_VAR */
448                         enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
449                         bool skip; /* VAR_TEX, TEXS, TEXC */
450                         bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
451                 };
452 
453                 /* Maximum size, for hashing */
454                 unsigned flags[5];
455 
456                 struct {
457                         enum bi_subgroup subgroup; /* WMASK, CLPER */
458                         enum bi_inactive_result inactive_result; /* CLPER */
459                         enum bi_lane_op lane_op; /* CLPER */
460                 };
461 
462                 struct {
463                         bool z; /* ZS_EMIT */
464                         bool stencil; /* ZS_EMIT */
465                 };
466 
467                 struct {
468                         bool h; /* VN_ASST1.f16 */
469                         bool l; /* VN_ASST1.f16 */
470                 };
471 
472                 struct {
473                         bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
474                         bool result_word;
475                 };
476 
477                 struct {
478                         bool sqrt; /* FREXPM */
479                         bool log; /* FREXPM */
480                 };
481 
482                 struct {
483                         enum bi_mode mode; /* FLOG_TABLE */
484                         enum bi_precision precision; /* FLOG_TABLE */
485                         bool divzero; /* FRSQ_APPROX, FRSQ */
486                 };
487         };
488 } bi_instr;
489 
490 /* Represents the assignment of slots for a given bi_tuple */
491 
492 typedef struct {
493         /* Register to assign to each slot */
494         unsigned slot[4];
495 
496         /* Read slots can be disabled */
497         bool enabled[2];
498 
499         /* Configuration for slots 2/3 */
500         struct bifrost_reg_ctrl_23 slot23;
501 
502         /* Fast-Access-Uniform RAM index */
503         uint8_t fau_idx;
504 
505         /* Whether writes are actually for the last instruction */
506         bool first_instruction;
507 } bi_registers;
508 
509 /* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
510  * leave it NULL; the emitter will fill in a nop. Instructions reference
511  * registers via slots which are assigned per tuple.
512  */
513 
514 typedef struct {
515         uint8_t fau_idx;
516         bi_registers regs;
517         bi_instr *fma;
518         bi_instr *add;
519 } bi_tuple;
520 
521 struct bi_block;
522 
523 typedef struct {
524         struct list_head link;
525 
526         /* Link back up for branch calculations */
527         struct bi_block *block;
528 
529         /* Architectural limit of 8 tuples/clause */
530         unsigned tuple_count;
531         bi_tuple tuples[8];
532 
533         /* For scoreboarding -- the clause ID (this is not globally unique!)
534          * and its dependencies in terms of other clauses, computed during
535          * scheduling and used when emitting code. Dependencies expressed as a
536          * bitfield matching the hardware, except shifted by a clause (the
537          * shift back to the ISA's off-by-one encoding is worked out when
538          * emitting clauses) */
539         unsigned scoreboard_id;
540         uint8_t dependencies;
541 
542         /* See ISA header for description */
543         enum bifrost_flow flow_control;
544 
545         /* Can we prefetch the next clause? Usually it makes sense, except for
546          * clauses ending in unconditional branches */
547         bool next_clause_prefetch;
548 
549         /* Assigned data register */
550         unsigned staging_register;
551 
552         /* Corresponds to the usual bit but shifted by a clause */
553         bool staging_barrier;
554 
555         /* Constants read by this clause. ISA limit. Must satisfy:
556          *
557          *      constant_count + tuple_count <= 13
558          *
559          * Also implicitly constant_count <= tuple_count since a tuple only
560          * reads a single constant.
561          */
562         uint64_t constants[8];
563         unsigned constant_count;
564 
565         /* Index of a constant to be PC-relative */
566         unsigned pcrel_idx;
567 
568         /* Branches encode a constant offset relative to the program counter
569          * with some magic flags. By convention, if there is a branch, its
570          * constant will be last. Set this flag to indicate this is required.
571          */
572         bool branch_constant;
573 
574         /* Unique in a clause */
575         enum bifrost_message_type message_type;
576         bi_instr *message;
577 
578         /* Discard helper threads */
579         bool td;
580 } bi_clause;
581 
582 typedef struct bi_block {
583         /* Link to next block. Must be first for mir_get_block */
584         struct list_head link;
585 
586         /* List of instructions emitted for the current block */
587         struct list_head instructions;
588 
589         /* Index of the block in source order */
590         unsigned name;
591 
592         /* Control flow graph */
593         struct bi_block *successors[2];
594         struct set *predecessors;
595         bool unconditional_jumps;
596 
597         /* Per 32-bit word live masks for the block indexed by node */
598         uint8_t *live_in;
599         uint8_t *live_out;
600 
601         /* If true, uses clauses; if false, uses instructions */
602         bool scheduled;
603         struct list_head clauses; /* list of bi_clause */
604 
605         /* Post-RA liveness */
606         uint64_t reg_live_in, reg_live_out;
607 
608         /* Flags available for pass-internal use */
609         uint8_t pass_flags;
610 } bi_block;
611 
612 typedef struct {
613        const struct panfrost_compile_inputs *inputs;
614        nir_shader *nir;
615        struct pan_shader_info *info;
616        gl_shader_stage stage;
617        struct list_head blocks; /* list of bi_block */
618        struct hash_table_u64 *sysval_to_id;
619        uint32_t quirks;
620        unsigned arch;
621 
622        /* During NIR->BIR */
623        bi_block *current_block;
624        bi_block *after_block;
625        bi_block *break_block;
626        bi_block *continue_block;
627        bool emitted_atest;
628 
629        /* For creating temporaries */
630        unsigned ssa_alloc;
631        unsigned reg_alloc;
632 
633        /* Analysis results */
634        bool has_liveness;
635 
636        /* Mask of UBOs that need to be uploaded */
637        uint32_t ubo_mask;
638 
639        /* Stats for shader-db */
640        unsigned instruction_count;
641        unsigned loop_count;
642        unsigned spills;
643        unsigned fills;
644 } bi_context;
645 
646 static inline void
bi_remove_instruction(bi_instr * ins)647 bi_remove_instruction(bi_instr *ins)
648 {
649         list_del(&ins->link);
650 }
651 
652 enum bir_fau {
653         BIR_FAU_ZERO = 0,
654         BIR_FAU_LANE_ID = 1,
655         BIR_FAU_WARP_ID = 2,
656         BIR_FAU_CORE_ID = 3,
657         BIR_FAU_FB_EXTENT = 4,
658         BIR_FAU_ATEST_PARAM = 5,
659         BIR_FAU_SAMPLE_POS_ARRAY = 6,
660         BIR_FAU_BLEND_0 = 8,
661         /* blend descs 1 - 7 */
662         BIR_FAU_TYPE_MASK = 15,
663 
664         /* Valhall only */
665         BIR_FAU_TLS_PTR = 16,
666         BIR_FAU_WLS_PTR = 17,
667         BIR_FAU_PROGRAM_COUNTER = 18,
668 
669         BIR_FAU_UNIFORM = (1 << 7),
670         /* Look up table on Valhall */
671         BIR_FAU_IMMEDIATE = (1 << 8),
672 
673 };
674 
675 static inline bi_index
bi_fau(enum bir_fau value,bool hi)676 bi_fau(enum bir_fau value, bool hi)
677 {
678         return (bi_index) {
679                 .type = BI_INDEX_FAU,
680                 .value = value,
681                 .swizzle = BI_SWIZZLE_H01,
682                 .offset = hi ? 1 : 0
683         };
684 }
685 
686 static inline unsigned
bi_max_temp(bi_context * ctx)687 bi_max_temp(bi_context *ctx)
688 {
689         return (MAX2(ctx->reg_alloc, ctx->ssa_alloc) + 2) << 1;
690 }
691 
692 static inline bi_index
bi_temp(bi_context * ctx)693 bi_temp(bi_context *ctx)
694 {
695         return bi_get_index(ctx->ssa_alloc++, false, 0);
696 }
697 
698 static inline bi_index
bi_temp_reg(bi_context * ctx)699 bi_temp_reg(bi_context *ctx)
700 {
701         return bi_get_index(ctx->reg_alloc++, true, 0);
702 }
703 
704 /* NIR booleans are 1-bit (0/1). For now, backend IR booleans are N-bit
705  * (0/~0) where N depends on the context. This requires us to sign-extend
706  * when converting constants from NIR to the backend IR.
707  */
708 static inline uint32_t
bi_extend_constant(uint32_t constant,unsigned bit_size)709 bi_extend_constant(uint32_t constant, unsigned bit_size)
710 {
711         if (bit_size == 1 && constant != 0)
712                 return ~0;
713         else
714                 return constant;
715 }
716 
717 /* Inline constants automatically, will be lowered out by bi_lower_fau where a
718  * constant is not allowed. load_const_to_scalar gaurantees that this makes
719  * sense */
720 
721 static inline bi_index
bi_src_index(nir_src * src)722 bi_src_index(nir_src *src)
723 {
724         if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
725                 uint32_t v = nir_src_as_uint(*src);
726 
727                 return bi_imm_u32(bi_extend_constant(v, nir_src_bit_size(*src)));
728         } else if (src->is_ssa) {
729                 return bi_get_index(src->ssa->index, false, 0);
730         } else {
731                 assert(!src->reg.indirect);
732                 return bi_get_index(src->reg.reg->index, true, 0);
733         }
734 }
735 
736 static inline bi_index
bi_dest_index(nir_dest * dst)737 bi_dest_index(nir_dest *dst)
738 {
739         if (dst->is_ssa)
740                 return bi_get_index(dst->ssa.index, false, 0);
741         else {
742                 assert(!dst->reg.indirect);
743                 return bi_get_index(dst->reg.reg->index, true, 0);
744         }
745 }
746 
747 static inline unsigned
bi_get_node(bi_index index)748 bi_get_node(bi_index index)
749 {
750         if (bi_is_null(index) || index.type != BI_INDEX_NORMAL)
751                 return ~0;
752         else
753                 return (index.value << 1) | index.reg;
754 }
755 
756 static inline bi_index
bi_node_to_index(unsigned node,unsigned node_count)757 bi_node_to_index(unsigned node, unsigned node_count)
758 {
759         assert(node < node_count);
760         assert(node_count < ~0);
761 
762         return bi_get_index(node >> 1, node & PAN_IS_REG, 0);
763 }
764 
765 /* Iterators for Bifrost IR */
766 
767 #define bi_foreach_block(ctx, v) \
768         list_for_each_entry(bi_block, v, &ctx->blocks, link)
769 
770 #define bi_foreach_block_rev(ctx, v) \
771         list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
772 
773 #define bi_foreach_block_from(ctx, from, v) \
774         list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
775 
776 #define bi_foreach_block_from_rev(ctx, from, v) \
777         list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
778 
779 #define bi_foreach_instr_in_block(block, v) \
780         list_for_each_entry(bi_instr, v, &(block)->instructions, link)
781 
782 #define bi_foreach_instr_in_block_rev(block, v) \
783         list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
784 
785 #define bi_foreach_instr_in_block_safe(block, v) \
786         list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
787 
788 #define bi_foreach_instr_in_block_safe_rev(block, v) \
789         list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
790 
791 #define bi_foreach_instr_in_block_from(block, v, from) \
792         list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
793 
794 #define bi_foreach_instr_in_block_from_rev(block, v, from) \
795         list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
796 
797 #define bi_foreach_clause_in_block(block, v) \
798         list_for_each_entry(bi_clause, v, &(block)->clauses, link)
799 
800 #define bi_foreach_clause_in_block_rev(block, v) \
801         list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
802 
803 #define bi_foreach_clause_in_block_safe(block, v) \
804         list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
805 
806 #define bi_foreach_clause_in_block_from(block, v, from) \
807         list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
808 
809 #define bi_foreach_clause_in_block_from_rev(block, v, from) \
810         list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
811 
812 #define bi_foreach_instr_global(ctx, v) \
813         bi_foreach_block(ctx, v_block) \
814                 bi_foreach_instr_in_block(v_block, v)
815 
816 #define bi_foreach_instr_global_rev(ctx, v) \
817         bi_foreach_block_rev(ctx, v_block) \
818                 bi_foreach_instr_in_block_rev(v_block, v)
819 
820 #define bi_foreach_instr_global_safe(ctx, v) \
821         bi_foreach_block(ctx, v_block) \
822                 bi_foreach_instr_in_block_safe(v_block, v)
823 
824 #define bi_foreach_instr_global_rev_safe(ctx, v) \
825         bi_foreach_block_rev(ctx, v_block) \
826                 bi_foreach_instr_in_block_rev_safe(v_block, v)
827 
828 #define bi_foreach_instr_in_tuple(tuple, v) \
829         for (bi_instr *v = (tuple)->fma ?: (tuple)->add; \
830                         v != NULL; \
831                         v = (v == (tuple)->add) ? NULL : (tuple)->add)
832 
833 #define bi_foreach_successor(blk, v) \
834         bi_block *v; \
835         bi_block **_v; \
836         for (_v = &blk->successors[0], \
837                 v = *_v; \
838                 v != NULL && _v < &blk->successors[2]; \
839                 _v++, v = *_v) \
840 
841 /* Based on set_foreach, expanded with automatic type casts */
842 
843 #define bi_foreach_predecessor(blk, v) \
844         struct set_entry *_entry_##v; \
845         bi_block *v; \
846         for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
847                 v = (bi_block *) (_entry_##v ? _entry_##v->key : NULL);  \
848                 _entry_##v != NULL; \
849                 _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
850                 v = (bi_block *) (_entry_##v ? _entry_##v->key : NULL))
851 
852 #define bi_foreach_src(ins, v) \
853         for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
854 
855 #define bi_foreach_dest(ins, v) \
856         for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)
857 
858 #define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \
859         bi_foreach_instr_in_tuple(tuple, ins) \
860                 bi_foreach_src(ins, s)
861 
862 static inline bi_instr *
bi_prev_op(bi_instr * ins)863 bi_prev_op(bi_instr *ins)
864 {
865         return list_last_entry(&(ins->link), bi_instr, link);
866 }
867 
868 static inline bi_instr *
bi_next_op(bi_instr * ins)869 bi_next_op(bi_instr *ins)
870 {
871         return list_first_entry(&(ins->link), bi_instr, link);
872 }
873 
874 static inline bi_block *
bi_next_block(bi_block * block)875 bi_next_block(bi_block *block)
876 {
877         return list_first_entry(&(block->link), bi_block, link);
878 }
879 
880 static inline bi_block *
bi_entry_block(bi_context * ctx)881 bi_entry_block(bi_context *ctx)
882 {
883         return list_first_entry(&ctx->blocks, bi_block, link);
884 }
885 
886 /* BIR manipulation */
887 
888 bool bi_has_arg(const bi_instr *ins, bi_index arg);
889 unsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
890 unsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
891 bool bi_is_regfmt_16(enum bi_register_format fmt);
892 unsigned bi_writemask(const bi_instr *ins, unsigned dest);
893 bi_clause * bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
894 bool bi_side_effects(enum bi_opcode op);
895 bool bi_reconverge_branches(bi_block *block);
896 
897 void bi_print_instr(const bi_instr *I, FILE *fp);
898 void bi_print_slots(bi_registers *regs, FILE *fp);
899 void bi_print_tuple(bi_tuple *tuple, FILE *fp);
900 void bi_print_clause(bi_clause *clause, FILE *fp);
901 void bi_print_block(bi_block *block, FILE *fp);
902 void bi_print_shader(bi_context *ctx, FILE *fp);
903 
904 /* BIR passes */
905 
906 void bi_analyze_helper_terminate(bi_context *ctx);
907 void bi_analyze_helper_requirements(bi_context *ctx);
908 void bi_opt_copy_prop(bi_context *ctx);
909 void bi_opt_cse(bi_context *ctx);
910 void bi_opt_mod_prop_forward(bi_context *ctx);
911 void bi_opt_mod_prop_backward(bi_context *ctx);
912 void bi_opt_dead_code_eliminate(bi_context *ctx);
913 void bi_opt_dce_post_ra(bi_context *ctx);
914 void bi_opt_push_ubo(bi_context *ctx);
915 void bi_lower_swizzle(bi_context *ctx);
916 void bi_lower_fau(bi_context *ctx);
917 void bi_assign_scoreboard(bi_context *ctx);
918 void bi_register_allocate(bi_context *ctx);
919 
920 void bi_lower_opt_instruction(bi_instr *I);
921 
922 void bi_schedule(bi_context *ctx);
923 bool bi_can_fma(bi_instr *ins);
924 bool bi_can_add(bi_instr *ins);
925 bool bi_must_message(bi_instr *ins);
926 bool bi_reads_zero(bi_instr *ins);
927 bool bi_reads_temps(bi_instr *ins, unsigned src);
928 bool bi_reads_t(bi_instr *ins, unsigned src);
929 
930 #ifndef NDEBUG
931 bool bi_validate_initialization(bi_context *ctx);
932 void bi_validate(bi_context *ctx, const char *after_str);
933 #else
bi_validate_initialization(UNUSED bi_context * ctx)934 static inline bool bi_validate_initialization(UNUSED bi_context *ctx) { return true; }
bi_validate(UNUSED bi_context * ctx,UNUSED const char * after_str)935 static inline void bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str) { return; }
936 #endif
937 
938 uint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
939 void bi_opt_constant_fold(bi_context *ctx);
940 
941 /* Liveness */
942 
943 void bi_compute_liveness(bi_context *ctx);
944 void bi_liveness_ins_update(uint8_t *live, bi_instr *ins, unsigned max);
945 void bi_invalidate_liveness(bi_context *ctx);
946 
947 void bi_postra_liveness(bi_context *ctx);
948 uint64_t bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
949 
950 /* Layout */
951 
952 signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
953 bool bi_ec0_packed(unsigned tuple_count);
954 
955 /* Check if there are no more instructions starting with a given block, this
956  * needs to recurse in case a shader ends with multiple empty blocks */
957 
958 static inline bool
bi_is_terminal_block(bi_block * block)959 bi_is_terminal_block(bi_block *block)
960 {
961         return (block == NULL) ||
962                 (list_is_empty(&block->instructions) &&
963                  bi_is_terminal_block(block->successors[0]) &&
964                  bi_is_terminal_block(block->successors[1]));
965 }
966 
967 /* Code emit */
968 
969 /* Returns the size of the final clause */
970 unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
971 
972 struct bi_packed_tuple {
973         uint64_t lo;
974         uint64_t hi;
975 };
976 
977 uint8_t bi_pack_literal(enum bi_clause_subword literal);
978 
979 uint8_t
980 bi_pack_upper(enum bi_clause_subword upper,
981                 struct bi_packed_tuple *tuples,
982                 ASSERTED unsigned tuple_count);
983 uint64_t
984 bi_pack_tuple_bits(enum bi_clause_subword idx,
985                 struct bi_packed_tuple *tuples,
986                 ASSERTED unsigned tuple_count,
987                 unsigned offset, unsigned nbits);
988 
989 uint8_t
990 bi_pack_sync(enum bi_clause_subword t1,
991              enum bi_clause_subword t2,
992              enum bi_clause_subword t3,
993              struct bi_packed_tuple *tuples,
994              ASSERTED unsigned tuple_count,
995              bool z);
996 
997 void
998 bi_pack_format(struct util_dynarray *emission,
999                 unsigned index,
1000                 struct bi_packed_tuple *tuples,
1001                 ASSERTED unsigned tuple_count,
1002                 uint64_t header, uint64_t ec0,
1003                 unsigned m0, bool z);
1004 
1005 unsigned bi_pack_fma(bi_instr *I,
1006                 enum bifrost_packed_src src0,
1007                 enum bifrost_packed_src src1,
1008                 enum bifrost_packed_src src2,
1009                 enum bifrost_packed_src src3);
1010 unsigned bi_pack_add(bi_instr *I,
1011                 enum bifrost_packed_src src0,
1012                 enum bifrost_packed_src src1,
1013                 enum bifrost_packed_src src2,
1014                 enum bifrost_packed_src src3);
1015 
1016 /* Like in NIR, for use with the builder */
1017 
1018 enum bi_cursor_option {
1019     bi_cursor_after_block,
1020     bi_cursor_before_instr,
1021     bi_cursor_after_instr
1022 };
1023 
1024 typedef struct {
1025     enum bi_cursor_option option;
1026 
1027     union {
1028         bi_block *block;
1029         bi_instr *instr;
1030     };
1031 } bi_cursor;
1032 
1033 static inline bi_cursor
bi_after_block(bi_block * block)1034 bi_after_block(bi_block *block)
1035 {
1036     return (bi_cursor) {
1037         .option = bi_cursor_after_block,
1038         .block = block
1039     };
1040 }
1041 
1042 static inline bi_cursor
bi_before_instr(bi_instr * instr)1043 bi_before_instr(bi_instr *instr)
1044 {
1045     return (bi_cursor) {
1046         .option = bi_cursor_before_instr,
1047         .instr = instr
1048     };
1049 }
1050 
1051 static inline bi_cursor
bi_after_instr(bi_instr * instr)1052 bi_after_instr(bi_instr *instr)
1053 {
1054     return (bi_cursor) {
1055         .option = bi_cursor_after_instr,
1056         .instr = instr
1057     };
1058 }
1059 
1060 /* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
1061  * in which case there must exist a nonempty penultimate tuple */
1062 
1063 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_tuple(bi_tuple * tuple)1064 bi_first_instr_in_tuple(bi_tuple *tuple)
1065 {
1066         bi_instr *instr = tuple->fma ?: tuple->add;
1067         assert(instr != NULL);
1068         return instr;
1069 }
1070 
1071 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_clause(bi_clause * clause)1072 bi_first_instr_in_clause(bi_clause *clause)
1073 {
1074         return bi_first_instr_in_tuple(&clause->tuples[0]);
1075 }
1076 
1077 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_last_instr_in_clause(bi_clause * clause)1078 bi_last_instr_in_clause(bi_clause *clause)
1079 {
1080         bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
1081         bi_instr *instr = tuple.add ?: tuple.fma;
1082 
1083         if (!instr) {
1084                 assert(clause->tuple_count >= 2);
1085                 tuple = clause->tuples[clause->tuple_count - 2];
1086                 instr = tuple.add ?: tuple.fma;
1087         }
1088 
1089         assert(instr != NULL);
1090         return instr;
1091 }
1092 
1093 /* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
1094  * (end) of the clause and adding a condition for the clause boundary */
1095 
1096 #define bi_foreach_instr_in_clause(block, clause, pos) \
1097    for (bi_instr *pos = LIST_ENTRY(bi_instr, bi_first_instr_in_clause(clause), link); \
1098 	(&pos->link != &(block)->instructions) \
1099                 && (pos != bi_next_op(bi_last_instr_in_clause(clause))); \
1100 	pos = LIST_ENTRY(bi_instr, pos->link.next, link))
1101 
1102 #define bi_foreach_instr_in_clause_rev(block, clause, pos) \
1103    for (bi_instr *pos = LIST_ENTRY(bi_instr, bi_last_instr_in_clause(clause), link); \
1104 	(&pos->link != &(block)->instructions) \
1105 	        && pos != bi_prev_op(bi_first_instr_in_clause(clause)); \
1106 	pos = LIST_ENTRY(bi_instr, pos->link.prev, link))
1107 
1108 static inline bi_cursor
bi_before_clause(bi_clause * clause)1109 bi_before_clause(bi_clause *clause)
1110 {
1111     return bi_before_instr(bi_first_instr_in_clause(clause));
1112 }
1113 
1114 static inline bi_cursor
bi_before_tuple(bi_tuple * tuple)1115 bi_before_tuple(bi_tuple *tuple)
1116 {
1117     return bi_before_instr(bi_first_instr_in_tuple(tuple));
1118 }
1119 
1120 static inline bi_cursor
bi_after_clause(bi_clause * clause)1121 bi_after_clause(bi_clause *clause)
1122 {
1123     return bi_after_instr(bi_last_instr_in_clause(clause));
1124 }
1125 
1126 /* IR builder in terms of cursor infrastructure */
1127 
1128 typedef struct {
1129     bi_context *shader;
1130     bi_cursor cursor;
1131 } bi_builder;
1132 
1133 static inline bi_builder
bi_init_builder(bi_context * ctx,bi_cursor cursor)1134 bi_init_builder(bi_context *ctx, bi_cursor cursor)
1135 {
1136         return (bi_builder) {
1137                 .shader = ctx,
1138                 .cursor = cursor
1139         };
1140 }
1141 
1142 /* Insert an instruction at the cursor and move the cursor */
1143 
1144 static inline void
bi_builder_insert(bi_cursor * cursor,bi_instr * I)1145 bi_builder_insert(bi_cursor *cursor, bi_instr *I)
1146 {
1147     switch (cursor->option) {
1148     case bi_cursor_after_instr:
1149         list_add(&I->link, &cursor->instr->link);
1150         cursor->instr = I;
1151         return;
1152 
1153     case bi_cursor_after_block:
1154         list_addtail(&I->link, &cursor->block->instructions);
1155         cursor->option = bi_cursor_after_instr;
1156         cursor->instr = I;
1157         return;
1158 
1159     case bi_cursor_before_instr:
1160         list_addtail(&I->link, &cursor->instr->link);
1161         cursor->option = bi_cursor_after_instr;
1162         cursor->instr = I;
1163         return;
1164     }
1165 
1166     unreachable("Invalid cursor option");
1167 }
1168 
1169 static inline unsigned
bi_word_node(bi_index idx)1170 bi_word_node(bi_index idx)
1171 {
1172         assert(idx.type == BI_INDEX_NORMAL && !idx.reg);
1173         return (idx.value << 2) | idx.offset;
1174 }
1175 
1176 /* NIR passes */
1177 
1178 bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
1179 
1180 #endif
1181