1 /*
2  * Copyright (c) 2017 Lima Project
3  * Copyright (c) 2013 Connor Abbott
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy
6  * of this software and associated documentation files (the "Software"), to deal
7  * in the Software without restriction, including without limitation the rights
8  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9  * copies of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21  * THE SOFTWARE.
22  *
23  */
24 
25 #ifndef LIMA_IR_GP_GPIR_H
26 #define LIMA_IR_GP_GPIR_H
27 
28 #include "util/list.h"
29 #include "util/u_math.h"
30 
31 #include "ir/lima_ir.h"
32 
33 /* list of operations that a node can do. */
34 typedef enum {
35    gpir_op_mov,
36 
37    /* mul ops */
38    gpir_op_mul,
39    gpir_op_select,
40    gpir_op_complex1,
41    gpir_op_complex2,
42 
43    /* add ops */
44    gpir_op_add,
45    gpir_op_floor,
46    gpir_op_sign,
47    gpir_op_ge,
48    gpir_op_lt,
49    gpir_op_min,
50    gpir_op_max,
51    gpir_op_abs,
52    gpir_op_not,
53 
54    /* mul/add ops */
55    gpir_op_neg,
56 
57    /* passthrough ops */
58    gpir_op_clamp_const,
59    gpir_op_preexp2,
60    gpir_op_postlog2,
61 
62    /* complex ops */
63    gpir_op_exp2_impl,
64    gpir_op_log2_impl,
65    gpir_op_rcp_impl,
66    gpir_op_rsqrt_impl,
67 
68    /* load/store ops */
69    gpir_op_load_uniform,
70    gpir_op_load_temp,
71    gpir_op_load_attribute,
72    gpir_op_load_reg,
73    gpir_op_store_temp,
74    gpir_op_store_reg,
75    gpir_op_store_varying,
76    gpir_op_store_temp_load_off0,
77    gpir_op_store_temp_load_off1,
78    gpir_op_store_temp_load_off2,
79 
80    /* branch */
81    gpir_op_branch_cond,
82 
83    /* const (emulated) */
84    gpir_op_const,
85 
86    /* emulated ops */
87    gpir_op_exp2,
88    gpir_op_log2,
89    gpir_op_rcp,
90    gpir_op_rsqrt,
91    gpir_op_ceil,
92    gpir_op_exp,
93    gpir_op_log,
94    gpir_op_sin,
95    gpir_op_cos,
96    gpir_op_tan,
97    gpir_op_branch_uncond,
98    gpir_op_eq,
99    gpir_op_ne,
100 
101    /* auxiliary ops */
102    gpir_op_dummy_f,
103    gpir_op_dummy_m,
104 
105    gpir_op_num,
106 } gpir_op;
107 
108 typedef enum {
109    gpir_node_type_alu,
110    gpir_node_type_const,
111    gpir_node_type_load,
112    gpir_node_type_store,
113    gpir_node_type_branch,
114 } gpir_node_type;
115 
116 typedef struct {
117    char *name;
118    bool dest_neg;
119    bool src_neg[4];
120    int *slots;
121    gpir_node_type type;
122    bool spillless;
123    bool schedule_first;
124    bool may_consume_two_slots;
125 } gpir_op_info;
126 
127 extern const gpir_op_info gpir_op_infos[];
128 
129 typedef struct {
130    enum {
131       GPIR_DEP_INPUT,     /* def is the input of use */
132       GPIR_DEP_OFFSET,    /* def is the offset of use (i.e. temp store) */
133       GPIR_DEP_READ_AFTER_WRITE,
134       GPIR_DEP_WRITE_AFTER_READ,
135    } type;
136 
137    /* node execute before succ */
138    struct gpir_node *pred;
139    /* node execute after pred */
140    struct gpir_node *succ;
141 
142    /* for node pred_list */
143    struct list_head pred_link;
144    /* for ndoe succ_list */
145    struct list_head succ_link;
146 } gpir_dep;
147 
148 struct gpir_instr;
149 struct gpir_store_node;
150 
151 typedef struct gpir_node {
152    struct list_head list;
153    gpir_op op;
154    gpir_node_type type;
155    int index;
156    char name[16];
157    bool printed;
158    struct gpir_block *block;
159 
160    /* for nodes relationship */
161    /* for node who uses this node (successor) */
162    struct list_head succ_list;
163    /* for node this node uses (predecessor) */
164    struct list_head pred_list;
165 
166    /* for scheduler and regalloc */
167    int value_reg;
168    union {
169       struct {
170          struct gpir_instr *instr;
171          struct gpir_store_node *physreg_store;
172          int pos;
173          int dist;
174          int index;
175          bool ready;
176          bool inserted;
177          bool max_node, next_max_node;
178          bool complex_allowed;
179       } sched;
180       struct {
181          int parent_index;
182          float reg_pressure;
183          int est;
184          bool scheduled;
185       } rsched;
186       struct {
187          float index;
188          struct gpir_node *last;
189       } vreg;
190       struct {
191          int index;
192       } preg;
193    };
194 } gpir_node;
195 
196 typedef struct {
197    gpir_node node;
198 
199    gpir_node *children[3];
200    bool children_negate[3];
201    int num_child;
202 
203    bool dest_negate;
204 } gpir_alu_node;
205 
206 typedef struct {
207    gpir_node node;
208    union fi value;
209 } gpir_const_node;
210 
211 typedef struct {
212    int index;
213    struct list_head list;
214 } gpir_reg;
215 
216 typedef struct {
217    gpir_node node;
218 
219    unsigned index;
220    unsigned component;
221 
222    gpir_reg *reg;
223    struct list_head reg_link;
224 } gpir_load_node;
225 
226 typedef struct gpir_store_node {
227    gpir_node node;
228 
229    unsigned index;
230    unsigned component;
231    gpir_node *child;
232 
233    gpir_reg *reg;
234 } gpir_store_node;
235 
236 enum gpir_instr_slot {
237    GPIR_INSTR_SLOT_MUL0,
238    GPIR_INSTR_SLOT_MUL1,
239    GPIR_INSTR_SLOT_ADD0,
240    GPIR_INSTR_SLOT_ADD1,
241    GPIR_INSTR_SLOT_PASS,
242    GPIR_INSTR_SLOT_COMPLEX,
243    GPIR_INSTR_SLOT_REG0_LOAD0,
244    GPIR_INSTR_SLOT_REG0_LOAD1,
245    GPIR_INSTR_SLOT_REG0_LOAD2,
246    GPIR_INSTR_SLOT_REG0_LOAD3,
247    GPIR_INSTR_SLOT_REG1_LOAD0,
248    GPIR_INSTR_SLOT_REG1_LOAD1,
249    GPIR_INSTR_SLOT_REG1_LOAD2,
250    GPIR_INSTR_SLOT_REG1_LOAD3,
251    GPIR_INSTR_SLOT_MEM_LOAD0,
252    GPIR_INSTR_SLOT_MEM_LOAD1,
253    GPIR_INSTR_SLOT_MEM_LOAD2,
254    GPIR_INSTR_SLOT_MEM_LOAD3,
255    GPIR_INSTR_SLOT_STORE0,
256    GPIR_INSTR_SLOT_STORE1,
257    GPIR_INSTR_SLOT_STORE2,
258    GPIR_INSTR_SLOT_STORE3,
259    GPIR_INSTR_SLOT_NUM,
260    GPIR_INSTR_SLOT_END,
261    GPIR_INSTR_SLOT_ALU_BEGIN      = GPIR_INSTR_SLOT_MUL0,
262    GPIR_INSTR_SLOT_ALU_END        = GPIR_INSTR_SLOT_COMPLEX,
263    GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0,
264    GPIR_INSTR_SLOT_DIST_TWO_END   = GPIR_INSTR_SLOT_PASS,
265 };
266 
267 typedef struct gpir_instr {
268    int index;
269    struct list_head list;
270 
271    gpir_node *slots[GPIR_INSTR_SLOT_NUM];
272 
273    /* The number of ALU slots free for moves. */
274    int alu_num_slot_free;
275 
276    /* The number of ALU slots free for moves, except for the complex slot. */
277    int alu_non_cplx_slot_free;
278 
279    /* We need to make sure that we can insert moves in the following cases:
280     * (1) There was a use of a value two cycles ago.
281     * (2) There were more than 5 uses of a value 1 cycle ago (or else we can't
282     *     possibly satisfy (1) for the next cycle).
283     * (3) There is a store instruction scheduled, but not its child.
284     *
285     * The complex slot cannot be used for a move in case (1), since it only
286     * has a FIFO depth of 1, but it can be used for (2) as well as (3) as long
287     * as the uses aren't in certain slots. It turns out that we don't have to
288     * worry about nodes that can't use the complex slot for (2), since there
289     * are at most 4 uses 1 cycle ago that can't use the complex slot, but we
290     * do have to worry about (3). This means tracking stores whose children
291     * cannot be in the complex slot. In order to ensure that we have enough
292     * space for all three, we maintain the following invariants:
293     *
294     * (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
295     *       alu_num_slot_needed_by_max +
296     *       max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)
297     * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +
298     *       alu_num_slot_needed_by_non_cplx_store
299     *
300     * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max
301     * nodes for the next instruction) but when there is a complex1 node in
302     * this instruction it reduces to 4 to reserve a slot for complex2 in the
303     * next instruction.
304     */
305    int alu_num_slot_needed_by_store;
306    int alu_num_slot_needed_by_non_cplx_store;
307    int alu_num_slot_needed_by_max;
308    int alu_num_unscheduled_next_max;
309    int alu_max_allowed_next_max;
310 
311    /* Used to communicate to the scheduler how many slots need to be cleared
312     * up in order to satisfy the invariants.
313     */
314    int slot_difference;
315    int non_cplx_slot_difference;
316 
317    int reg0_use_count;
318    bool reg0_is_attr;
319    int reg0_index;
320 
321    int reg1_use_count;
322    int reg1_index;
323 
324    int mem_use_count;
325    bool mem_is_temp;
326    int mem_index;
327 
328    enum {
329       GPIR_INSTR_STORE_NONE,
330       GPIR_INSTR_STORE_VARYING,
331       GPIR_INSTR_STORE_REG,
332       GPIR_INSTR_STORE_TEMP,
333    } store_content[2];
334    int store_index[2];
335 } gpir_instr;
336 
337 typedef struct gpir_block {
338    struct list_head list;
339    struct list_head node_list;
340    struct list_head instr_list;
341    struct gpir_compiler *comp;
342 
343    struct gpir_block *successors[2];
344    struct list_head predecessors;
345    struct list_head predecessors_node;
346 
347    /* for regalloc */
348 
349    /* The set of live registers, i.e. registers whose value may be used
350     * eventually, at the beginning of the block.
351     */
352    BITSET_WORD *live_in;
353 
354    /* Set of live registers at the end of the block. */
355    BITSET_WORD *live_out;
356 
357    /* Set of registers that may have a value defined at the end of the
358     * block.
359     */
360    BITSET_WORD *def_out;
361 
362    /* After register allocation, the set of live physical registers at the end
363     * of the block. Needed for scheduling.
364     */
365    uint64_t live_out_phys;
366 
367    /* For codegen, the offset in the final program. */
368    unsigned instr_offset;
369 
370    /* for scheduler */
371    union {
372       struct {
373          int instr_index;
374       } sched;
375       struct {
376          int node_index;
377       } rsched;
378    };
379 } gpir_block;
380 
381 typedef struct {
382    gpir_node node;
383    gpir_block *dest;
384    gpir_node *cond;
385 } gpir_branch_node;
386 
387 struct lima_vs_compiled_shader;
388 
389 #define GPIR_VECTOR_SSA_VIEWPORT_SCALE  0
390 #define GPIR_VECTOR_SSA_VIEWPORT_OFFSET 1
391 #define GPIR_VECTOR_SSA_NUM 2
392 
393 typedef struct gpir_compiler {
394    struct list_head block_list;
395    int cur_index;
396 
397    /* Find the gpir node for a given NIR SSA def. */
398    gpir_node **node_for_ssa;
399 
400    /* Find the gpir node for a given NIR register. */
401    gpir_node **node_for_reg;
402 
403    /* Find the gpir register for a given NIR SSA def. */
404    gpir_reg **reg_for_ssa;
405 
406    /* Find the gpir register for a given NIR register. */
407    gpir_reg **reg_for_reg;
408 
409    /* gpir block for NIR block. */
410    gpir_block **blocks;
411 
412    /* for physical reg */
413    struct list_head reg_list;
414    int cur_reg;
415 
416    /* lookup for vector ssa */
417    struct {
418       int ssa;
419       gpir_node *nodes[4];
420    } vector_ssa[GPIR_VECTOR_SSA_NUM];
421 
422    struct lima_vs_compiled_shader *prog;
423    int constant_base;
424 
425    /* shaderdb */
426    int num_instr;
427    int num_loops;
428    int num_spills;
429    int num_fills;
430 } gpir_compiler;
431 
432 #define GPIR_VALUE_REG_NUM 11
433 #define GPIR_PHYSICAL_REG_NUM 64
434 
435 void *gpir_node_create(gpir_block *block, gpir_op op);
436 gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type);
437 void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred);
438 void gpir_node_replace_succ(gpir_node *dst, gpir_node *src);
439 void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred);
440 void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child);
441 void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child);
442 void gpir_node_delete(gpir_node *node);
443 void gpir_node_print_prog_dep(gpir_compiler *comp);
444 void gpir_node_print_prog_seq(gpir_compiler *comp);
445 
446 #define gpir_node_foreach_succ(node, dep) \
447    list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link)
448 #define gpir_node_foreach_succ_safe(node, dep) \
449    list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link)
450 #define gpir_node_foreach_pred(node, dep) \
451    list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link)
452 #define gpir_node_foreach_pred_safe(node, dep) \
453    list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link)
454 
gpir_node_is_root(gpir_node * node)455 static inline bool gpir_node_is_root(gpir_node *node)
456 {
457    return list_is_empty(&node->succ_list);
458 }
459 
gpir_node_is_leaf(gpir_node * node)460 static inline bool gpir_node_is_leaf(gpir_node *node)
461 {
462    return list_is_empty(&node->pred_list);
463 }
464 
465 #define gpir_node_to_alu(node) ((gpir_alu_node *)(node))
466 #define gpir_node_to_const(node) ((gpir_const_node *)(node))
467 #define gpir_node_to_load(node) ((gpir_load_node *)(node))
468 #define gpir_node_to_store(node) ((gpir_store_node *)(node))
469 #define gpir_node_to_branch(node) ((gpir_branch_node *)(node))
470 
471 gpir_instr *gpir_instr_create(gpir_block *block);
472 bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);
473 void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);
474 void gpir_instr_print_prog(gpir_compiler *comp);
475 
476 bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);
477 
478 bool gpir_optimize(gpir_compiler *comp);
479 bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);
480 bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);
481 bool gpir_regalloc_prog(gpir_compiler *comp);
482 bool gpir_schedule_prog(gpir_compiler *comp);
483 bool gpir_codegen_prog(gpir_compiler *comp);
484 
485 gpir_reg *gpir_create_reg(gpir_compiler *comp);
486 
487 #endif
488