1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/bitscan.h"
26 #include "util/ralloc.h"
27 
28 #include "ppir.h"
29 
ppir_lower_const(ppir_block * block,ppir_node * node)30 static bool ppir_lower_const(ppir_block *block, ppir_node *node)
31 {
32    if (ppir_node_is_root(node)) {
33       ppir_node_delete(node);
34       return true;
35    }
36 
37    assert(ppir_node_has_single_succ(node));
38 
39    ppir_node *succ = ppir_node_first_succ(node);
40    ppir_dest *dest = ppir_node_get_dest(node);
41 
42    switch (succ->type) {
43    case ppir_node_type_alu:
44    case ppir_node_type_branch:
45       /* ALU and branch can consume consts directly */
46       dest->type = ppir_target_pipeline;
47       /* Reg will be updated in node_to_instr later */
48       dest->pipeline = ppir_pipeline_reg_const0;
49 
50       /* single succ can still have multiple references to this node */
51       for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
52          ppir_src *src = ppir_node_get_src(succ, i);
53          if (src && src->node == node) {
54             src->type = ppir_target_pipeline;
55             src->pipeline = ppir_pipeline_reg_const0;
56          }
57       }
58       return true;
59    default:
60       /* Create a move for everyone else */
61       break;
62    }
63 
64    ppir_node *move = ppir_node_insert_mov(node);
65    if (unlikely(!move))
66       return false;
67 
68    ppir_debug("lower const create move %d for %d\n",
69               move->index, node->index);
70 
71    /* Need to be careful with changing src/dst type here:
72     * it has to be done *after* successors have their children
73     * replaced, otherwise ppir_node_replace_child() won't find
74     * matching src/dst and as result won't work
75     */
76    ppir_src *mov_src = ppir_node_get_src(move, 0);
77    mov_src->type = dest->type = ppir_target_pipeline;
78    mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_const0;
79 
80    return true;
81 }
82 
ppir_lower_swap_args(ppir_block * block,ppir_node * node)83 static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)
84 {
85    /* swapped op must be the next op */
86    node->op++;
87 
88    assert(node->type == ppir_node_type_alu);
89    ppir_alu_node *alu = ppir_node_to_alu(node);
90    assert(alu->num_src == 2);
91 
92    ppir_src tmp = alu->src[0];
93    alu->src[0] = alu->src[1];
94    alu->src[1] = tmp;
95    return true;
96 }
97 
ppir_lower_load(ppir_block * block,ppir_node * node)98 static bool ppir_lower_load(ppir_block *block, ppir_node *node)
99 {
100    ppir_dest *dest = ppir_node_get_dest(node);
101    if (ppir_node_is_root(node) && dest->type == ppir_target_ssa) {
102       ppir_node_delete(node);
103       return true;
104    }
105 
106    /* load can have multiple successors in case if we duplicated load node
107     * that has load node in source
108     */
109    if ((ppir_node_has_single_src_succ(node) || ppir_node_is_root(node)) &&
110       dest->type != ppir_target_register) {
111       ppir_node *succ = ppir_node_first_succ(node);
112       switch (succ->type) {
113       case ppir_node_type_alu:
114       case ppir_node_type_branch: {
115          /* single succ can still have multiple references to this node */
116          for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
117             ppir_src *src = ppir_node_get_src(succ, i);
118             if (src && src->node == node) {
119                /* Can consume uniforms directly */
120                src->type = dest->type = ppir_target_pipeline;
121                src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;
122             }
123          }
124          return true;
125       }
126       default:
127          /* Create mov for everyone else */
128          break;
129       }
130    }
131 
132    ppir_node *move = ppir_node_insert_mov(node);
133    if (unlikely(!move))
134       return false;
135 
136    ppir_src *mov_src = ppir_node_get_src(move, 0);
137    mov_src->type = dest->type = ppir_target_pipeline;
138    mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;
139 
140    return true;
141 }
142 
ppir_lower_ddxy(ppir_block * block,ppir_node * node)143 static bool ppir_lower_ddxy(ppir_block *block, ppir_node *node)
144 {
145    assert(node->type == ppir_node_type_alu);
146    ppir_alu_node *alu = ppir_node_to_alu(node);
147 
148    alu->src[1] = alu->src[0];
149    if (node->op == ppir_op_ddx)
150       alu->src[1].negate = !alu->src[1].negate;
151    else if (node->op == ppir_op_ddy)
152       alu->src[0].negate = !alu->src[0].negate;
153    else
154       assert(0);
155 
156    alu->num_src = 2;
157 
158    return true;
159 }
160 
ppir_lower_texture(ppir_block * block,ppir_node * node)161 static bool ppir_lower_texture(ppir_block *block, ppir_node *node)
162 {
163    ppir_dest *dest = ppir_node_get_dest(node);
164 
165    if (ppir_node_has_single_succ(node) && dest->type == ppir_target_ssa) {
166       ppir_node *succ = ppir_node_first_succ(node);
167       dest->type = ppir_target_pipeline;
168       dest->pipeline = ppir_pipeline_reg_sampler;
169 
170       for (int i = 0; i < ppir_node_get_src_num(succ); i++) {
171          ppir_src *src = ppir_node_get_src(succ, i);
172          if (src && src->node == node) {
173             src->type = ppir_target_pipeline;
174             src->pipeline = ppir_pipeline_reg_sampler;
175          }
176       }
177       return true;
178    }
179 
180    /* Create move node as fallback */
181    ppir_node *move = ppir_node_insert_mov(node);
182    if (unlikely(!move))
183       return false;
184 
185    ppir_debug("lower texture create move %d for %d\n",
186               move->index, node->index);
187 
188    ppir_src *mov_src = ppir_node_get_src(move, 0);
189    mov_src->type = dest->type = ppir_target_pipeline;
190    mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_sampler;
191 
192    return true;
193 }
194 
195 /* Check if the select condition and ensure it can be inserted to
196  * the scalar mul slot */
ppir_lower_select(ppir_block * block,ppir_node * node)197 static bool ppir_lower_select(ppir_block *block, ppir_node *node)
198 {
199    ppir_alu_node *alu = ppir_node_to_alu(node);
200    ppir_src *src0 = &alu->src[0];
201    ppir_src *src1 = &alu->src[1];
202    ppir_src *src2 = &alu->src[2];
203 
204    /* If the condition is already an alu scalar whose only successor
205     * is the select node, just turn it into pipeline output. */
206    /* The (src2->node == cond) case is a tricky exception.
207     * The reason is that we must force cond to output to ^fmul -- but
208     * then it no longer writes to a register and it is impossible to
209     * reference ^fmul in src2. So in that exceptional case, also fall
210     * back to the mov. */
211    ppir_node *cond = src0->node;
212    if (cond &&
213        cond->type == ppir_node_type_alu &&
214        ppir_node_has_single_succ(cond) &&
215        ppir_target_is_scalar(ppir_node_get_dest(cond)) &&
216        ppir_node_schedulable_slot(cond, PPIR_INSTR_SLOT_ALU_SCL_MUL) &&
217        src2->node != cond) {
218 
219       ppir_dest *cond_dest = ppir_node_get_dest(cond);
220       cond_dest->type = ppir_target_pipeline;
221       cond_dest->pipeline = ppir_pipeline_reg_fmul;
222 
223       ppir_node_target_assign(src0, cond);
224 
225       /* src1 could also be a reference from the same node as
226        * the condition, so update it in that case. */
227       if (src1->node && src1->node == cond)
228          ppir_node_target_assign(src1, cond);
229 
230       return true;
231    }
232 
233    /* If the condition can't be used for any reason, insert a mov
234     * so that the condition can end up in ^fmul */
235    ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);
236    if (!move)
237       return false;
238    list_addtail(&move->list, &node->list);
239 
240    ppir_alu_node *move_alu = ppir_node_to_alu(move);
241    ppir_src *move_src = move_alu->src;
242    move_src->type = src0->type;
243    move_src->ssa = src0->ssa;
244    move_src->swizzle[0] = src0->swizzle[0];
245    move_alu->num_src = 1;
246 
247    ppir_dest *move_dest = &move_alu->dest;
248    move_dest->type = ppir_target_pipeline;
249    move_dest->pipeline = ppir_pipeline_reg_fmul;
250    move_dest->write_mask = 1;
251 
252    ppir_node *pred = src0->node;
253    ppir_dep *dep = ppir_dep_for_pred(node, pred);
254    if (dep)
255       ppir_node_replace_pred(dep, move);
256    else
257       ppir_node_add_dep(node, move, ppir_dep_src);
258 
259    /* pred can be a register */
260    if (pred)
261       ppir_node_add_dep(move, pred, ppir_dep_src);
262 
263    ppir_node_target_assign(src0, move);
264 
265    /* src1 could also be a reference from the same node as
266     * the condition, so update it in that case. */
267    if (src1->node && src1->node == pred)
268       ppir_node_target_assign(src1, move);
269 
270    return true;
271 }
272 
ppir_lower_trunc(ppir_block * block,ppir_node * node)273 static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
274 {
275    /* Turn it into a mov with a round to integer output modifier */
276    ppir_alu_node *alu = ppir_node_to_alu(node);
277    ppir_dest *move_dest = &alu->dest;
278    move_dest->modifier = ppir_outmod_round;
279    node->op = ppir_op_mov;
280 
281    return true;
282 }
283 
ppir_lower_abs(ppir_block * block,ppir_node * node)284 static bool ppir_lower_abs(ppir_block *block, ppir_node *node)
285 {
286    /* Turn it into a mov and set the absolute modifier */
287    ppir_alu_node *alu = ppir_node_to_alu(node);
288 
289    assert(alu->num_src == 1);
290 
291    alu->src[0].absolute = true;
292    alu->src[0].negate = false;
293    node->op = ppir_op_mov;
294 
295    return true;
296 }
297 
ppir_lower_neg(ppir_block * block,ppir_node * node)298 static bool ppir_lower_neg(ppir_block *block, ppir_node *node)
299 {
300    /* Turn it into a mov and set the negate modifier */
301    ppir_alu_node *alu = ppir_node_to_alu(node);
302 
303    assert(alu->num_src == 1);
304 
305    alu->src[0].negate = !alu->src[0].negate;
306    node->op = ppir_op_mov;
307 
308    return true;
309 }
310 
ppir_lower_sat(ppir_block * block,ppir_node * node)311 static bool ppir_lower_sat(ppir_block *block, ppir_node *node)
312 {
313    /* Turn it into a mov with the saturate output modifier */
314    ppir_alu_node *alu = ppir_node_to_alu(node);
315 
316    assert(alu->num_src == 1);
317 
318    ppir_dest *move_dest = &alu->dest;
319    move_dest->modifier = ppir_outmod_clamp_fraction;
320    node->op = ppir_op_mov;
321 
322    return true;
323 }
324 
ppir_lower_branch(ppir_block * block,ppir_node * node)325 static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
326 {
327    ppir_branch_node *branch = ppir_node_to_branch(node);
328 
329    /* Unconditional branch */
330    if (branch->num_src == 0)
331       return true;
332 
333    ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
334 
335    if (!zero)
336       return false;
337 
338    zero->constant.value[0].f = 0;
339    zero->constant.num = 1;
340    zero->dest.type = ppir_target_pipeline;
341    zero->dest.pipeline = ppir_pipeline_reg_const0;
342    zero->dest.ssa.num_components = 1;
343    zero->dest.write_mask = 0x01;
344 
345    /* For now we're just comparing branch condition with 0,
346     * in future we should look whether it's possible to move
347     * comparision node into branch itself and use current
348     * way as a fallback for complex conditions.
349     */
350    ppir_node_target_assign(&branch->src[1], &zero->node);
351 
352    if (branch->negate)
353       branch->cond_eq = true;
354    else {
355       branch->cond_gt = true;
356       branch->cond_lt = true;
357    }
358 
359    branch->num_src = 2;
360 
361    ppir_node_add_dep(&branch->node, &zero->node, ppir_dep_src);
362    list_addtail(&zero->node.list, &node->list);
363 
364    return true;
365 }
366 
ppir_lower_accum(ppir_block * block,ppir_node * node)367 static bool ppir_lower_accum(ppir_block *block, ppir_node *node)
368 {
369     /* If the last argument of a node placed in PPIR_INSTR_SLOT_ALU_SCL_ADD
370     * (or PPIR_INSTR_SLOT_ALU_VEC_ADD) is placed in
371     * PPIR_INSTR_SLOT_ALU_SCL_MUL (or PPIR_INSTR_SLOT_ALU_VEC_MUL) we cannot
372     * save a register (and an instruction) by using a pipeline register.
373     * Therefore it is interesting to make sure arguments of that type are
374     * the first argument by swapping arguments (if possible) */
375    ppir_alu_node *alu = ppir_node_to_alu(node);
376 
377    assert(alu->num_src >= 2);
378 
379    if (alu->src[0].type == ppir_target_pipeline)
380       return true;
381 
382    if (alu->src[0].type == ppir_target_ssa) {
383       int *src_0_slots = ppir_op_infos[alu->src[0].node->op].slots;
384       if (src_0_slots) {
385          for (int i = 0; src_0_slots[i] != PPIR_INSTR_SLOT_END; i++) {
386             if ((src_0_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) ||
387                (src_0_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) {
388                return true;
389             }
390          }
391       }
392    }
393 
394    int src_to_swap = -1;
395    for (int j = 1; j < alu->num_src; j++) {
396       if (alu->src[j].type != ppir_target_ssa)
397          continue;
398       int *src_slots = ppir_op_infos[alu->src[j].node->op].slots;
399       if (!src_slots)
400          continue;
401       for (int i = 0; src_slots[i] != PPIR_INSTR_SLOT_END; i++) {
402          if ((src_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) ||
403              (src_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) {
404             src_to_swap = j;
405             break;
406          }
407       }
408       if (src_to_swap > 0)
409          break;
410    }
411 
412    if (src_to_swap < 0)
413       return true;
414 
415    /* Swap arguments so that we can use a pipeline register later on */
416    ppir_src tmp = alu->src[0];
417    alu->src[0] = alu->src[src_to_swap];
418    alu->src[src_to_swap] = tmp;
419 
420    return true;
421 }
422 
423 static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
424    [ppir_op_abs] = ppir_lower_abs,
425    [ppir_op_neg] = ppir_lower_neg,
426    [ppir_op_const] = ppir_lower_const,
427    [ppir_op_ddx] = ppir_lower_ddxy,
428    [ppir_op_ddy] = ppir_lower_ddxy,
429    [ppir_op_lt] = ppir_lower_swap_args,
430    [ppir_op_le] = ppir_lower_swap_args,
431    [ppir_op_load_texture] = ppir_lower_texture,
432    [ppir_op_select] = ppir_lower_select,
433    [ppir_op_trunc] = ppir_lower_trunc,
434    [ppir_op_sat] = ppir_lower_sat,
435    [ppir_op_branch] = ppir_lower_branch,
436    [ppir_op_load_uniform] = ppir_lower_load,
437    [ppir_op_load_temp] = ppir_lower_load,
438    [ppir_op_add] = ppir_lower_accum,
439    [ppir_op_max] = ppir_lower_accum,
440    [ppir_op_min] = ppir_lower_accum,
441    [ppir_op_eq] = ppir_lower_accum,
442    [ppir_op_ne] = ppir_lower_accum,
443 };
444 
ppir_lower_prog(ppir_compiler * comp)445 bool ppir_lower_prog(ppir_compiler *comp)
446 {
447    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
448       list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
449          if (ppir_lower_funcs[node->op] &&
450              !ppir_lower_funcs[node->op](block, node))
451             return false;
452       }
453    }
454 
455    return true;
456 }
457