1 /*
2  * Copyright (C) 2020 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef __PAN_IR_H
25 #define __PAN_IR_H
26 
27 #include <stdint.h>
28 #include "compiler/nir/nir.h"
29 #include "util/u_dynarray.h"
30 #include "util/hash_table.h"
31 
32 /* Indices for named (non-XFB) varyings that are present. These are packed
33  * tightly so they correspond to a bitfield present (P) indexed by (1 <<
34  * PAN_VARY_*). This has the nice property that you can lookup the buffer index
35  * of a given special field given a shift S by:
36  *
37  *      idx = popcount(P & ((1 << S) - 1))
38  *
39  * That is... look at all of the varyings that come earlier and count them, the
40  * count is the new index since plus one. Likewise, the total number of special
41  * buffers required is simply popcount(P)
42  */
43 
44 enum pan_special_varying {
45         PAN_VARY_GENERAL = 0,
46         PAN_VARY_POSITION = 1,
47         PAN_VARY_PSIZ = 2,
48         PAN_VARY_PNTCOORD = 3,
49         PAN_VARY_FACE = 4,
50         PAN_VARY_FRAGCOORD = 5,
51 
52         /* Keep last */
53         PAN_VARY_MAX,
54 };
55 
56 /* Maximum number of attribute descriptors required for varyings. These include
57  * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL
58  * special varying */
59 #define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1)
60 
61 /* Define the general compiler entry point */
62 
63 #define MAX_SYSVAL_COUNT 32
64 
65 /* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
66  * their class for equal comparison */
67 
68 #define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
69 #define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
70 #define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
71 
72 /* Define some common types. We start at one for easy indexing of hash
73  * tables internal to the compiler */
74 
75 enum {
76         PAN_SYSVAL_VIEWPORT_SCALE = 1,
77         PAN_SYSVAL_VIEWPORT_OFFSET = 2,
78         PAN_SYSVAL_TEXTURE_SIZE = 3,
79         PAN_SYSVAL_SSBO = 4,
80         PAN_SYSVAL_NUM_WORK_GROUPS = 5,
81         PAN_SYSVAL_SAMPLER = 7,
82         PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
83         PAN_SYSVAL_WORK_DIM = 9,
84         PAN_SYSVAL_IMAGE_SIZE = 10,
85         PAN_SYSVAL_SAMPLE_POSITIONS = 11,
86         PAN_SYSVAL_MULTISAMPLED = 12,
87         PAN_SYSVAL_RT_CONVERSION = 13,
88         PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
89         PAN_SYSVAL_DRAWID = 15,
90         PAN_SYSVAL_BLEND_CONSTANTS = 16,
91 };
92 
93 #define PAN_TXS_SYSVAL_ID(texidx, dim, is_array)          \
94 	((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
95 
96 #define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id)        ((id) & 0x7f)
97 #define PAN_SYSVAL_ID_TO_TXS_DIM(id)            (((id) >> 7) & 0x3)
98 #define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id)       !!((id) & (1 << 9))
99 
100 /* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
101  * consistent with the blob so we can compare traces easier. */
102 
103 enum {
104         PAN_VERTEX_ID   = 16,
105         PAN_INSTANCE_ID = 17,
106         PAN_MAX_ATTRIBUTE
107 };
108 
109 struct panfrost_sysvals {
110         /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
111         unsigned sysvals[MAX_SYSVAL_COUNT];
112         unsigned sysval_count;
113 };
114 
115 /* Technically Midgard could go up to 92 in a pathological case but we don't
116  * take advantage of that. Likewise Bifrost's FAU encoding can address 128
117  * words but actual implementations (G72, G76) are capped at 64 */
118 
119 #define PAN_MAX_PUSH 64
120 
121 /* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so
122  * an offset to a word must be < 2^16. There are less than 2^8 UBOs */
123 
124 struct panfrost_ubo_word {
125         uint16_t ubo;
126         uint16_t offset;
127 };
128 
129 struct panfrost_ubo_push {
130         unsigned count;
131         struct panfrost_ubo_word words[PAN_MAX_PUSH];
132 };
133 
134 /* Helper for searching the above. Note this is O(N) to the number of pushed
135  * constants, do not run in the draw call hot path */
136 
137 unsigned
138 pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);
139 
140 struct hash_table_u64 *
141 panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx);
142 
143 unsigned
144 pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
145                   struct panfrost_sysvals *sysvals,
146                   int sysval);
147 
148 int
149 panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
150 
151 struct panfrost_compile_inputs {
152         unsigned gpu_id;
153         bool is_blend, is_blit;
154         struct {
155                 unsigned rt;
156                 unsigned nr_samples;
157                 uint64_t bifrost_blend_desc;
158         } blend;
159         unsigned sysval_ubo;
160         bool shaderdb;
161         bool no_ubo_to_push;
162 
163         enum pipe_format rt_formats[8];
164         uint8_t raw_fmt_mask;
165         unsigned nr_cbufs;
166 
167         union {
168                 struct {
169                         bool static_rt_conv;
170                         uint32_t rt_conv[8];
171                 } bifrost;
172         };
173 };
174 
175 struct pan_shader_varying {
176         gl_varying_slot location;
177         enum pipe_format format;
178 };
179 
180 struct bifrost_shader_blend_info {
181         nir_alu_type type;
182         uint32_t return_offset;
183 
184         /* mali_bifrost_register_file_format corresponding to nir_alu_type */
185         unsigned format;
186 };
187 
188 struct bifrost_shader_info {
189         struct bifrost_shader_blend_info blend[8];
190         nir_alu_type blend_src1_type;
191         bool wait_6, wait_7;
192 
193         /* Packed, preloaded message descriptors */
194         uint16_t messages[2];
195 };
196 
197 struct midgard_shader_info {
198         unsigned first_tag;
199 };
200 
201 struct pan_shader_info {
202         gl_shader_stage stage;
203         unsigned work_reg_count;
204         unsigned tls_size;
205         unsigned wls_size;
206 
207         union {
208                 struct {
209                         bool reads_frag_coord;
210                         bool reads_point_coord;
211                         bool reads_face;
212                         bool helper_invocations;
213                         bool can_discard;
214                         bool writes_depth;
215                         bool writes_stencil;
216                         bool writes_coverage;
217                         bool sidefx;
218                         bool reads_sample_id;
219                         bool reads_sample_pos;
220                         bool reads_sample_mask_in;
221                         bool reads_helper_invocation;
222                         bool sample_shading;
223                         bool early_fragment_tests;
224                         bool can_early_z, can_fpk;
225                         BITSET_WORD outputs_read;
226                         BITSET_WORD outputs_written;
227                 } fs;
228 
229                 struct {
230                         bool writes_point_size;
231                 } vs;
232         };
233 
234         bool separable;
235         bool contains_barrier;
236         bool writes_global;
237         uint64_t outputs_written;
238 
239         unsigned sampler_count;
240         unsigned texture_count;
241         unsigned ubo_count;
242         unsigned attribute_count;
243 
244         struct {
245                 unsigned input_count;
246                 struct pan_shader_varying input[PAN_MAX_VARYINGS];
247                 unsigned output_count;
248                 struct pan_shader_varying output[PAN_MAX_VARYINGS];
249         } varyings;
250 
251         struct panfrost_sysvals sysvals;
252 
253         /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
254          * Uniforms (Bifrost) */
255         struct panfrost_ubo_push push;
256 
257         uint32_t ubo_mask;
258 
259         union {
260                 struct bifrost_shader_info bifrost;
261                 struct midgard_shader_info midgard;
262         };
263 };
264 
265 typedef struct pan_block {
266         /* Link to next block. Must be first for mir_get_block */
267         struct list_head link;
268 
269         /* List of instructions emitted for the current block */
270         struct list_head instructions;
271 
272         /* Index of the block in source order */
273         unsigned name;
274 
275         /* Control flow graph */
276         struct pan_block *successors[2];
277         struct set *predecessors;
278         bool unconditional_jumps;
279 
280         /* In liveness analysis, these are live masks (per-component) for
281          * indices for the block. Scalar compilers have the luxury of using
282          * simple bit fields, but for us, liveness is a vector idea. */
283         uint16_t *live_in;
284         uint16_t *live_out;
285 } pan_block;
286 
287 struct pan_instruction {
288         struct list_head link;
289 };
290 
291 #define pan_foreach_instr_in_block_rev(block, v) \
292         list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link)
293 
294 #define pan_foreach_successor(blk, v) \
295         pan_block *v; \
296         pan_block **_v; \
297         for (_v = (pan_block **) &blk->successors[0], \
298                 v = *_v; \
299                 v != NULL && _v < (pan_block **) &blk->successors[2]; \
300                 _v++, v = *_v) \
301 
302 #define pan_foreach_predecessor(blk, v) \
303         struct set_entry *_entry_##v; \
304         struct pan_block *v; \
305         for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
306                 v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL);  \
307                 _entry_##v != NULL; \
308                 _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
309                 v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL))
310 
311 static inline pan_block *
pan_exit_block(struct list_head * blocks)312 pan_exit_block(struct list_head *blocks)
313 {
314         pan_block *last = list_last_entry(blocks, pan_block, link);
315         assert(!last->successors[0] && !last->successors[1]);
316         return last;
317 }
318 
319 typedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);
320 
321 void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
322 void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
323 bool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);
324 
325 void pan_compute_liveness(struct list_head *blocks,
326                 unsigned temp_count,
327                 pan_liveness_update callback);
328 
329 void pan_free_liveness(struct list_head *blocks);
330 
331 uint16_t
332 pan_to_bytemask(unsigned bytes, unsigned mask);
333 
334 void pan_block_add_successor(pan_block *block, pan_block *successor);
335 
336 /* IR indexing */
337 #define PAN_IS_REG (1)
338 
339 static inline unsigned
pan_ssa_index(nir_ssa_def * ssa)340 pan_ssa_index(nir_ssa_def *ssa)
341 {
342         /* Off-by-one ensures BIR_NO_ARG is skipped */
343         return ((ssa->index + 1) << 1) | 0;
344 }
345 
346 static inline unsigned
pan_src_index(nir_src * src)347 pan_src_index(nir_src *src)
348 {
349         if (src->is_ssa)
350                 return pan_ssa_index(src->ssa);
351         else {
352                 assert(!src->reg.indirect);
353                 return (src->reg.reg->index << 1) | PAN_IS_REG;
354         }
355 }
356 
357 static inline unsigned
pan_dest_index(nir_dest * dst)358 pan_dest_index(nir_dest *dst)
359 {
360         if (dst->is_ssa)
361                 return pan_ssa_index(&dst->ssa);
362         else {
363                 assert(!dst->reg.indirect);
364                 return (dst->reg.reg->index << 1) | PAN_IS_REG;
365         }
366 }
367 
368 /* IR printing helpers */
369 void pan_print_alu_type(nir_alu_type t, FILE *fp);
370 
371 /* Until it can be upstreamed.. */
372 bool pan_has_source_mod(nir_alu_src *src, nir_op op);
373 bool pan_has_dest_mod(nir_dest **dest, nir_op op);
374 
375 /* NIR passes to do some backend-specific lowering */
376 
377 #define PAN_WRITEOUT_C 1
378 #define PAN_WRITEOUT_Z 2
379 #define PAN_WRITEOUT_S 4
380 
381 bool pan_nir_reorder_writeout(nir_shader *nir);
382 bool pan_nir_lower_zs_store(nir_shader *nir);
383 
384 bool pan_nir_lower_64bit_intrin(nir_shader *shader);
385 
386 bool pan_lower_helper_invocation(nir_shader *shader);
387 bool pan_lower_sample_pos(nir_shader *shader);
388 
389 #endif
390