1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/u_math.h"
26 #include "util/ralloc.h"
27 
28 #include "gpir.h"
29 
30 const gpir_op_info gpir_op_infos[] = {
31    [gpir_op_mov] = {
32       .name = "mov",
33       .slots = (int []) {
34          GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1,
35          GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0,
36          GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_COMPLEX,
37          GPIR_INSTR_SLOT_END
38       },
39    },
40    [gpir_op_mul] = {
41       .name = "mul",
42       .dest_neg = true,
43       .slots = (int []) { GPIR_INSTR_SLOT_MUL1, GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
44    },
45    [gpir_op_select] = {
46       .name = "select",
47       .dest_neg = true,
48       .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
49       .may_consume_two_slots = true,
50    },
51    [gpir_op_complex1] = {
52       .name = "complex1",
53       .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
54       .spillless = true,
55       .may_consume_two_slots = true,
56    },
57    [gpir_op_complex2] = {
58       .name = "complex2",
59       .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
60       .spillless = true,
61       .schedule_first = true,
62    },
63    [gpir_op_add] = {
64       .name = "add",
65       .src_neg = {true, true, false, false},
66       .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
67    },
68    [gpir_op_floor] = {
69       .name = "floor",
70       .src_neg = {true, false, false, false},
71       .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
72       .spillless = true,
73       .may_consume_two_slots = true,
74    },
75    [gpir_op_sign] = {
76       .name = "sign",
77       .src_neg = {true, false, false, false},
78       .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
79       .spillless = true,
80       .may_consume_two_slots = true,
81    },
82    [gpir_op_ge] = {
83       .name = "ge",
84       .src_neg = {true, true, false, false},
85       .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
86       .spillless = true,
87       .may_consume_two_slots = true,
88    },
89    [gpir_op_lt] = {
90       .name = "lt",
91       .src_neg = {true, true, false, false},
92       .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
93       .spillless = true,
94       .may_consume_two_slots = true,
95    },
96    [gpir_op_min] = {
97       .name = "min",
98       .src_neg = {true, true, false, false},
99       .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
100       .spillless = true,
101       .may_consume_two_slots = true,
102    },
103    [gpir_op_max] = {
104       .name = "max",
105       .src_neg = {true, true, false, false},
106       .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
107       .spillless = true,
108       .may_consume_two_slots = true,
109    },
110    [gpir_op_abs] = {
111       .name = "abs",
112       .src_neg = {true, true, false, false},
113    },
114    [gpir_op_neg] = {
115       .name = "neg",
116       .slots = (int []) {
117          GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1,
118          GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0,
119          GPIR_INSTR_SLOT_END
120       },
121    },
122    [gpir_op_not] = {
123       .name = "not",
124       .src_neg = {true, true, false, false},
125       .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
126    },
127    [gpir_op_eq] = {
128       .name = "eq",
129       .slots = (int []) {
130          GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END
131       },
132    },
133    [gpir_op_ne] = {
134       .name = "ne",
135       .slots = (int []) {
136          GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END
137       },
138    },
139    [gpir_op_clamp_const] = {
140       .name = "clamp_const",
141    },
142    [gpir_op_preexp2] = {
143       .name = "preexp2",
144       .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
145       .spillless = true,
146       .schedule_first = true,
147    },
148    [gpir_op_postlog2] = {
149       .name = "postlog2",
150       .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
151    },
152    [gpir_op_exp2_impl] = {
153       .name = "exp2_impl",
154       .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
155       .spillless = true,
156       .schedule_first = true,
157    },
158    [gpir_op_log2_impl] = {
159       .name = "log2_impl",
160       .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
161       .spillless = true,
162       .schedule_first = true,
163    },
164    [gpir_op_rcp_impl] = {
165       .name = "rcp_impl",
166       .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
167       .spillless = true,
168       .schedule_first = true,
169    },
170    [gpir_op_rsqrt_impl] = {
171       .name = "rsqrt_impl",
172       .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
173       .spillless = true,
174       .schedule_first = true,
175    },
176    [gpir_op_load_uniform] = {
177       .name = "ld_uni",
178       .slots = (int []) {
179          GPIR_INSTR_SLOT_MEM_LOAD0, GPIR_INSTR_SLOT_MEM_LOAD1,
180          GPIR_INSTR_SLOT_MEM_LOAD2, GPIR_INSTR_SLOT_MEM_LOAD3,
181          GPIR_INSTR_SLOT_END
182       },
183       .type = gpir_node_type_load,
184    },
185    [gpir_op_load_temp] = {
186       .name = "ld_tmp",
187       .type = gpir_node_type_load,
188    },
189    [gpir_op_load_attribute] = {
190       .name = "ld_att",
191       .slots = (int []) {
192          GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1,
193          GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3,
194          GPIR_INSTR_SLOT_END
195       },
196       .type = gpir_node_type_load,
197    },
198    [gpir_op_load_reg] = {
199       .name = "ld_reg",
200       .slots = (int []) {
201          GPIR_INSTR_SLOT_REG1_LOAD0, GPIR_INSTR_SLOT_REG1_LOAD1,
202          GPIR_INSTR_SLOT_REG1_LOAD2, GPIR_INSTR_SLOT_REG1_LOAD3,
203          GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1,
204          GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3,
205          GPIR_INSTR_SLOT_END
206       },
207       .type = gpir_node_type_load,
208       .spillless = true,
209    },
210    [gpir_op_store_temp] = {
211       .name = "st_tmp",
212       .type = gpir_node_type_store,
213    },
214    [gpir_op_store_reg] = {
215       .name = "st_reg",
216       .slots = (int []) {
217          GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1,
218          GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3,
219          GPIR_INSTR_SLOT_END
220       },
221       .type = gpir_node_type_store,
222       .spillless = true,
223    },
224    [gpir_op_store_varying] = {
225       .name = "st_var",
226       .slots = (int []) {
227          GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1,
228          GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3,
229          GPIR_INSTR_SLOT_END
230       },
231       .type = gpir_node_type_store,
232       .spillless = true,
233    },
234    [gpir_op_store_temp_load_off0] = {
235       .name = "st_of0",
236       .type = gpir_node_type_store,
237    },
238    [gpir_op_store_temp_load_off1] = {
239       .name = "st_of1",
240       .type = gpir_node_type_store,
241    },
242    [gpir_op_store_temp_load_off2] = {
243       .name = "st_of2",
244       .type = gpir_node_type_store,
245    },
246    [gpir_op_branch_cond] = {
247       .name = "branch_cond",
248       .type = gpir_node_type_branch,
249       .schedule_first = true,
250       .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
251    },
252    [gpir_op_const] = {
253       .name = "const",
254       .type = gpir_node_type_const,
255    },
256    [gpir_op_exp2] = {
257       .name = "exp2",
258    },
259    [gpir_op_log2] = {
260       .name = "log2",
261    },
262    [gpir_op_rcp] = {
263       .name = "rcp",
264    },
265    [gpir_op_rsqrt] = {
266       .name = "rsqrt",
267    },
268    [gpir_op_ceil] = {
269       .name = "ceil",
270    },
271    [gpir_op_exp] = {
272       .name = "exp",
273    },
274    [gpir_op_log] = {
275       .name = "log",
276    },
277    [gpir_op_sin] = {
278       .name = "sin",
279    },
280    [gpir_op_cos] = {
281       .name = "cos",
282    },
283    [gpir_op_tan] = {
284       .name = "tan",
285    },
286    [gpir_op_dummy_f] = {
287       .name = "dummy_f",
288       .type = gpir_node_type_alu,
289       .spillless = true,
290    },
291    [gpir_op_dummy_m] = {
292       .name = "dummy_m",
293       .type = gpir_node_type_alu,
294    },
295    [gpir_op_branch_uncond] = {
296       .name = "branch_uncond",
297       .type = gpir_node_type_branch,
298    },
299 };
300 
gpir_node_create(gpir_block * block,gpir_op op)301 void *gpir_node_create(gpir_block *block, gpir_op op)
302 {
303    static const int node_size[] = {
304       [gpir_node_type_alu] = sizeof(gpir_alu_node),
305       [gpir_node_type_const] = sizeof(gpir_const_node),
306       [gpir_node_type_load] = sizeof(gpir_load_node),
307       [gpir_node_type_store] = sizeof(gpir_store_node),
308       [gpir_node_type_branch] = sizeof(gpir_branch_node),
309    };
310 
311    gpir_node_type type = gpir_op_infos[op].type;
312    int size = node_size[type];
313    gpir_node *node = rzalloc_size(block, size);
314    if (unlikely(!node))
315       return NULL;
316 
317    snprintf(node->name, sizeof(node->name), "new");
318 
319    list_inithead(&node->succ_list);
320    list_inithead(&node->pred_list);
321 
322    node->op = op;
323    node->type = type;
324    node->index = block->comp->cur_index++;
325    node->block = block;
326 
327    return node;
328 }
329 
gpir_node_add_dep(gpir_node * succ,gpir_node * pred,int type)330 gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type)
331 {
332    /* don't add dep for two nodes from different block */
333    if (succ->block != pred->block)
334       return NULL;
335 
336    /* don't add self loop dep */
337    if (succ == pred)
338       return NULL;
339 
340    /* don't add duplicated dep */
341    gpir_node_foreach_pred(succ, dep) {
342       if (dep->pred == pred) {
343          /* use stronger dependency */
344          if (dep->type > type)
345             dep->type = type;
346          return dep;
347       }
348    }
349 
350    gpir_dep *dep = ralloc(succ, gpir_dep);
351    dep->type = type;
352    dep->pred = pred;
353    dep->succ = succ;
354    list_addtail(&dep->pred_link, &succ->pred_list);
355    list_addtail(&dep->succ_link, &pred->succ_list);
356    return dep;
357 }
358 
gpir_node_remove_dep(gpir_node * succ,gpir_node * pred)359 void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred)
360 {
361    gpir_node_foreach_pred(succ, dep) {
362       if (dep->pred == pred) {
363          list_del(&dep->succ_link);
364          list_del(&dep->pred_link);
365          ralloc_free(dep);
366          return;
367       }
368    }
369 }
370 
gpir_node_replace_child(gpir_node * parent,gpir_node * old_child,gpir_node * new_child)371 void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child,
372                              gpir_node *new_child)
373 {
374    if (parent->type == gpir_node_type_alu) {
375       gpir_alu_node *alu = gpir_node_to_alu(parent);
376       for (int i = 0; i < alu->num_child; i++) {
377          if (alu->children[i] == old_child)
378             alu->children[i] = new_child;
379       }
380    }
381    else if (parent->type == gpir_node_type_store) {
382       gpir_store_node *store = gpir_node_to_store(parent);
383       if (store->child == old_child)
384          store->child = new_child;
385    } else if (parent->type == gpir_node_type_branch) {
386       gpir_branch_node *branch = gpir_node_to_branch(parent);
387       if (branch->cond == old_child)
388          branch->cond = new_child;
389    }
390 }
391 
gpir_node_replace_pred(gpir_dep * dep,gpir_node * new_pred)392 void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred)
393 {
394    list_del(&dep->succ_link);
395    dep->pred = new_pred;
396    list_addtail(&dep->succ_link, &new_pred->succ_list);
397 }
398 
gpir_node_replace_succ(gpir_node * dst,gpir_node * src)399 void gpir_node_replace_succ(gpir_node *dst, gpir_node *src)
400 {
401    gpir_node_foreach_succ_safe(src, dep) {
402       if (dep->type != GPIR_DEP_INPUT)
403          continue;
404 
405       gpir_node_replace_pred(dep, dst);
406       gpir_node_replace_child(dep->succ, src, dst);
407    }
408 }
409 
gpir_node_insert_child(gpir_node * parent,gpir_node * child,gpir_node * insert_child)410 void gpir_node_insert_child(gpir_node *parent, gpir_node *child,
411                             gpir_node *insert_child)
412 {
413    gpir_node_foreach_pred(parent, dep) {
414       if (dep->pred == child) {
415          gpir_node_replace_pred(dep, insert_child);
416          gpir_node_replace_child(parent, child, insert_child);
417          break;
418       }
419    }
420 }
421 
gpir_node_delete(gpir_node * node)422 void gpir_node_delete(gpir_node *node)
423 {
424    gpir_node_foreach_succ_safe(node, dep) {
425       list_del(&dep->succ_link);
426       list_del(&dep->pred_link);
427       ralloc_free(dep);
428    }
429 
430    gpir_node_foreach_pred_safe(node, dep) {
431       list_del(&dep->succ_link);
432       list_del(&dep->pred_link);
433       ralloc_free(dep);
434    }
435 
436    list_del(&node->list);
437    ralloc_free(node);
438 }
439 
gpir_node_print_node(gpir_node * node,int type,int space)440 static void gpir_node_print_node(gpir_node *node, int type, int space)
441 {
442    static char *dep_name[] = {
443       [GPIR_DEP_INPUT] = "input",
444       [GPIR_DEP_OFFSET] = "offset",
445       [GPIR_DEP_READ_AFTER_WRITE] = "RaW",
446       [GPIR_DEP_WRITE_AFTER_READ] = "WaR",
447    };
448 
449    for (int i = 0; i < space; i++)
450       printf(" ");
451    printf("%s%s %d %s %s\n", node->printed && !gpir_node_is_leaf(node) ? "+" : "",
452           gpir_op_infos[node->op].name, node->index, node->name, dep_name[type]);
453 
454    if (!node->printed) {
455       gpir_node_foreach_pred(node, dep) {
456          gpir_node_print_node(dep->pred, dep->type, space + 2);
457       }
458 
459       node->printed = true;
460    }
461 }
462 
gpir_node_print_prog_dep(gpir_compiler * comp)463 void gpir_node_print_prog_dep(gpir_compiler *comp)
464 {
465    if (!(lima_debug & LIMA_DEBUG_GP))
466       return;
467 
468    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
469       list_for_each_entry(gpir_node, node, &block->node_list, list) {
470          node->printed = false;
471       }
472    }
473 
474    printf("======== node prog dep ========\n");
475    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
476       list_for_each_entry(gpir_node, node, &block->node_list, list) {
477          if (gpir_node_is_root(node))
478             gpir_node_print_node(node, GPIR_DEP_INPUT, 0);
479       }
480       printf("----------------------------\n");
481    }
482 }
483 
gpir_node_print_prog_seq(gpir_compiler * comp)484 void gpir_node_print_prog_seq(gpir_compiler *comp)
485 {
486    if (!(lima_debug & LIMA_DEBUG_GP))
487       return;
488 
489    int index = 0;
490    printf("======== node prog seq ========\n");
491    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
492       list_for_each_entry(gpir_node, node, &block->node_list, list) {
493          printf("%03d: %s %d %s pred", index++, gpir_op_infos[node->op].name,
494                 node->index, node->name);
495          gpir_node_foreach_pred(node, dep) {
496             printf(" %d", dep->pred->index);
497          }
498          printf(" succ");
499          gpir_node_foreach_succ(node, dep) {
500             printf(" %d", dep->succ->index);
501          }
502          printf("\n");
503       }
504       printf("----------------------------\n");
505    }
506 }
507