1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/ralloc.h"
26 
27 #include "gpir.h"
28 #include "lima_context.h"
29 
gpir_lower_const(gpir_compiler * comp)30 static bool gpir_lower_const(gpir_compiler *comp)
31 {
32    int num_constant = 0;
33    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
34       list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
35          if (node->op == gpir_op_const) {
36             if (gpir_node_is_root(node))
37                gpir_node_delete(node);
38             else
39                num_constant++;
40          }
41       }
42    }
43 
44    if (num_constant) {
45       union fi *constant = ralloc_array(comp->prog, union fi, num_constant);
46       if (!constant)
47          return false;
48 
49       comp->prog->constant = constant;
50       comp->prog->state.constant_size = num_constant * sizeof(union fi);
51 
52       int index = 0;
53       list_for_each_entry(gpir_block, block, &comp->block_list, list) {
54          list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
55             if (node->op == gpir_op_const) {
56                gpir_const_node *c = gpir_node_to_const(node);
57 
58                if (!gpir_node_is_root(node)) {
59                   gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform);
60                   if (unlikely(!load))
61                      return false;
62 
63                   load->index = comp->constant_base + (index >> 2);
64                   load->component = index % 4;
65                   constant[index++] = c->value;
66 
67                   gpir_node_replace_succ(&load->node, node);
68 
69                   list_addtail(&load->node.list, &node->list);
70 
71                   gpir_debug("lower const create uniform %d for const %d\n",
72                              load->node.index, node->index);
73                }
74 
75                gpir_node_delete(node);
76             }
77          }
78       }
79    }
80 
81    return true;
82 }
83 
84 /* duplicate load to all its successors */
gpir_lower_load(gpir_compiler * comp)85 static bool gpir_lower_load(gpir_compiler *comp)
86 {
87    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
88       list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
89          if (node->type == gpir_node_type_load) {
90             gpir_load_node *load = gpir_node_to_load(node);
91 
92             bool first = true;
93             gpir_node_foreach_succ_safe(node, dep) {
94                gpir_node *succ = dep->succ;
95 
96                if (first) {
97                   first = false;
98                   continue;
99                }
100 
101                gpir_node *new = gpir_node_create(succ->block, node->op);
102                if (unlikely(!new))
103                   return false;
104                list_addtail(&new->list, &succ->list);
105 
106                gpir_debug("lower load create %d from %d for succ %d\n",
107                           new->index, node->index, succ->index);
108 
109                gpir_load_node *nload = gpir_node_to_load(new);
110                nload->index = load->index;
111                nload->component = load->component;
112                nload->reg = load->reg;
113 
114                gpir_node_replace_pred(dep, new);
115                gpir_node_replace_child(succ, node, new);
116             }
117          }
118       }
119    }
120 
121    return true;
122 }
123 
gpir_lower_neg(gpir_block * block,gpir_node * node)124 static bool gpir_lower_neg(gpir_block *block, gpir_node *node)
125 {
126    gpir_alu_node *neg = gpir_node_to_alu(node);
127    gpir_node *child = neg->children[0];
128 
129    /* check if child can dest negate */
130    if (child->type == gpir_node_type_alu) {
131       /* negate must be its only successor */
132       if (list_is_singular(&child->succ_list) &&
133           gpir_op_infos[child->op].dest_neg) {
134          gpir_alu_node *alu = gpir_node_to_alu(child);
135          alu->dest_negate = !alu->dest_negate;
136 
137          gpir_node_replace_succ(child, node);
138          gpir_node_delete(node);
139          return true;
140       }
141    }
142 
143    /* check if child can src negate */
144    gpir_node_foreach_succ_safe(node, dep) {
145       gpir_node *succ = dep->succ;
146       if (succ->type != gpir_node_type_alu)
147          continue;
148 
149       bool success = true;
150       gpir_alu_node *alu = gpir_node_to_alu(dep->succ);
151       for (int i = 0; i < alu->num_child; i++) {
152          if (alu->children[i] == node) {
153             if (gpir_op_infos[succ->op].src_neg[i]) {
154                alu->children_negate[i] = !alu->children_negate[i];
155                alu->children[i] = child;
156             }
157             else
158                success = false;
159          }
160       }
161 
162       if (success)
163          gpir_node_replace_pred(dep, child);
164    }
165 
166    if (gpir_node_is_root(node))
167       gpir_node_delete(node);
168 
169    return true;
170 }
171 
gpir_lower_complex(gpir_block * block,gpir_node * node)172 static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
173 {
174    gpir_alu_node *alu = gpir_node_to_alu(node);
175    gpir_node *child = alu->children[0];
176 
177    if (node->op == gpir_op_exp2) {
178       gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);
179       if (unlikely(!preexp2))
180          return false;
181 
182       preexp2->children[0] = child;
183       preexp2->num_child = 1;
184       gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);
185       list_addtail(&preexp2->node.list, &node->list);
186 
187       child = &preexp2->node;
188    }
189 
190    gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
191    if (unlikely(!complex2))
192       return false;
193 
194    complex2->children[0] = child;
195    complex2->num_child = 1;
196    gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT);
197    list_addtail(&complex2->node.list, &node->list);
198 
199    int impl_op = 0;
200    switch (node->op) {
201    case gpir_op_rcp:
202       impl_op = gpir_op_rcp_impl;
203       break;
204    case gpir_op_rsqrt:
205       impl_op = gpir_op_rsqrt_impl;
206       break;
207    case gpir_op_exp2:
208       impl_op = gpir_op_exp2_impl;
209       break;
210    case gpir_op_log2:
211       impl_op = gpir_op_log2_impl;
212       break;
213    default:
214       assert(0);
215    }
216 
217    gpir_alu_node *impl = gpir_node_create(block, impl_op);
218    if (unlikely(!impl))
219       return false;
220 
221    impl->children[0] = child;
222    impl->num_child = 1;
223    gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
224    list_addtail(&impl->node.list, &node->list);
225 
226    gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);
227    complex1->children[0] = &impl->node;
228    complex1->children[1] = &complex2->node;
229    complex1->children[2] = child;
230    complex1->num_child = 3;
231    gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);
232    gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);
233    gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);
234    list_addtail(&complex1->node.list, &node->list);
235 
236    gpir_node *result = &complex1->node;
237 
238    if (node->op == gpir_op_log2) {
239       gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);
240       if (unlikely(!postlog2))
241          return false;
242 
243       postlog2->children[0] = result;
244       postlog2->num_child = 1;
245       gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);
246       list_addtail(&postlog2->node.list, &node->list);
247 
248       result = &postlog2->node;
249    }
250 
251    gpir_node_replace_succ(result, node);
252    gpir_node_delete(node);
253 
254    return true;
255 }
256 
gpir_lower_node_may_consume_two_slots(gpir_compiler * comp)257 static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp)
258 {
259    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
260       list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
261          if (gpir_op_infos[node->op].may_consume_two_slots) {
262             /* dummy_f/m are auxiliary nodes for value reg alloc:
263              * 1. before reg alloc, create fake nodes dummy_f, dummy_m,
264              *    so the tree become: (dummy_m (node dummy_f))
265              *    dummy_m can be spilled, but other nodes in the tree can't
266              *    be spilled.
267              * 2. After reg allocation and fake dep add, merge all deps of
268              *    dummy_m and dummy_f to node and remove dummy_m & dummy_f
269              *
270              * We may also not use dummy_f/m, but alloc two value reg for
271              * node. But that means we need to make sure there're 2 free
272              * slot after the node successors, but we just need one slot
273              * after to be able to schedule it because we can use one move for
274              * the two slot node. It's also not easy to handle the spill case
275              * for the alloc 2 value method.
276              *
277              * With the dummy_f/m method, there's no such requirement, the
278              * node can be scheduled only when there's two slots for it,
279              * otherwise a move. And the node can be spilled with one reg.
280              */
281             gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m);
282             if (unlikely(!dummy_m))
283                return false;
284             list_add(&dummy_m->list, &node->list);
285 
286             gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f);
287             if (unlikely(!dummy_f))
288                return false;
289             list_add(&dummy_f->list, &node->list);
290 
291             gpir_alu_node *alu = gpir_node_to_alu(dummy_m);
292             alu->children[0] = node;
293             alu->children[1] = dummy_f;
294             alu->num_child = 2;
295 
296             gpir_node_replace_succ(dummy_m, node);
297             gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT);
298             gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT);
299 
300          }
301       }
302    }
303 
304    return true;
305 }
306 
307 /*
308  * There are no 'equal' or 'not-equal' opcodes.
309  * eq (a == b) is lowered to and(a >= b, b >= a)
310  * ne (a != b) is lowered to or(a < b, b < a)
311  */
gpir_lower_eq_ne(gpir_block * block,gpir_node * node)312 static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node)
313 {
314    gpir_op cmp_node_op;
315    gpir_op node_new_op;
316    switch (node->op) {
317       case gpir_op_eq:
318          cmp_node_op = gpir_op_ge;
319          node_new_op = gpir_op_min; /* and */
320          break;
321       case gpir_op_ne:
322          cmp_node_op = gpir_op_lt;
323          node_new_op = gpir_op_max; /* or */
324          break;
325       default:
326          unreachable("bad node op");
327    }
328 
329    gpir_alu_node *e = gpir_node_to_alu(node);
330 
331    gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op);
332    list_addtail(&cmp1->node.list, &node->list);
333    gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op);
334    list_addtail(&cmp2->node.list, &node->list);
335 
336    cmp1->children[0] = e->children[0];
337    cmp1->children[1] = e->children[1];
338    cmp1->num_child = 2;
339 
340    cmp2->children[0] = e->children[1];
341    cmp2->children[1] = e->children[0];
342    cmp2->num_child = 2;
343 
344    gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT);
345    gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT);
346 
347    gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT);
348    gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT);
349 
350    gpir_node_foreach_pred_safe(node, dep) {
351       gpir_node_remove_dep(node, dep->pred);
352    }
353 
354    gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT);
355    gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT);
356 
357    node->op = node_new_op;
358    e->children[0] = &cmp1->node;
359    e->children[1] = &cmp2->node;
360    e->num_child = 2;
361 
362    return true;
363 }
364 
365 /*
366  * There is no 'abs' opcode.
367  * abs(a) is lowered to max(a, -a)
368  */
gpir_lower_abs(gpir_block * block,gpir_node * node)369 static bool gpir_lower_abs(gpir_block *block, gpir_node *node)
370 {
371    gpir_alu_node *alu = gpir_node_to_alu(node);
372 
373    assert(node->op == gpir_op_abs);
374 
375    node->op = gpir_op_max;
376 
377    alu->children[1] = alu->children[0];
378    alu->children_negate[1] = true;
379    alu->num_child = 2;
380 
381    return true;
382 }
383 
384 /*
385  * There is no 'not' opcode.
386  * not(a) is lowered to add(1, -a)
387  */
gpir_lower_not(gpir_block * block,gpir_node * node)388 static bool gpir_lower_not(gpir_block *block, gpir_node *node)
389 {
390    gpir_alu_node *alu = gpir_node_to_alu(node);
391 
392    assert(alu->node.op == gpir_op_not);
393 
394    node->op = gpir_op_add;
395 
396    gpir_node *node_const = gpir_node_create(block, gpir_op_const);
397    gpir_const_node *c = gpir_node_to_const(node_const);
398 
399    assert(c->node.op == gpir_op_const);
400 
401    list_addtail(&c->node.list, &node->list);
402    c->value.f = 1.0f;
403    gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT);
404 
405    alu->children_negate[1] = !alu->children_negate[0];
406    alu->children[1] = alu->children[0];
407    alu->children[0] = &c->node;
408    alu->num_child = 2;
409 
410    return true;
411 }
412 
413 /* There is no unconditional branch instruction, so we have to lower it to a
414  * conditional branch with a condition of 1.0.
415  */
416 
gpir_lower_branch_uncond(gpir_block * block,gpir_node * node)417 static bool gpir_lower_branch_uncond(gpir_block *block, gpir_node *node)
418 {
419    gpir_branch_node *branch = gpir_node_to_branch(node);
420 
421    gpir_node *node_const = gpir_node_create(block, gpir_op_const);
422    gpir_const_node *c = gpir_node_to_const(node_const);
423 
424    list_addtail(&c->node.list, &node->list);
425    c->value.f = 1.0f;
426    gpir_node_add_dep(&branch->node, &c->node, GPIR_DEP_INPUT);
427 
428    branch->node.op = gpir_op_branch_cond;
429    branch->cond = node_const;
430 
431    return true;
432 }
433 
434 static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
435    [gpir_op_not] = gpir_lower_not,
436    [gpir_op_neg] = gpir_lower_neg,
437    [gpir_op_rcp] = gpir_lower_complex,
438    [gpir_op_rsqrt] = gpir_lower_complex,
439    [gpir_op_exp2] = gpir_lower_complex,
440    [gpir_op_log2] = gpir_lower_complex,
441    [gpir_op_eq] = gpir_lower_eq_ne,
442    [gpir_op_ne] = gpir_lower_eq_ne,
443    [gpir_op_abs] = gpir_lower_abs,
444    [gpir_op_branch_uncond] = gpir_lower_branch_uncond,
445 };
446 
gpir_pre_rsched_lower_prog(gpir_compiler * comp)447 bool gpir_pre_rsched_lower_prog(gpir_compiler *comp)
448 {
449    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
450       list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
451          if (gpir_pre_rsched_lower_funcs[node->op] &&
452              !gpir_pre_rsched_lower_funcs[node->op](block, node))
453             return false;
454       }
455    }
456 
457    if (!gpir_lower_const(comp))
458       return false;
459 
460    if (!gpir_lower_load(comp))
461       return false;
462 
463    if (!gpir_lower_node_may_consume_two_slots(comp))
464       return false;
465 
466    gpir_debug("pre rsched lower prog\n");
467    gpir_node_print_prog_seq(comp);
468    return true;
469 }
470 
471