1 /*
2  * Copyright © 2021 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file v3d_opt_constant_alu.c
26  *
27  * Identified sequences of ALU instructions that operate on constant operands
28  * and reduces them to a uniform load.
29  *
30  * This is useful, for example, to optimize the result of removing leading
31  * ldunifa instructions in the DCE pass, which can leave a series of constant
32  * additions that increment the unifa address by 4 for each leading ldunif
33  * removed. It helps turn this:
34  *
35  * nop t1; ldunif (0x00000004 / 0.000000)
36  * nop t2; ldunif (0x00000004 / 0.000000)
37  * add t3, t1, t2
38  *
39  * into:
40  *
41  * nop t1; ldunif (0x00000004 / 0.000000)
42  * nop t2; ldunif (0x00000004 / 0.000000)
43  * nop t4; ldunif (0x00000008 / 0.000000)
44  * mov t3, t4
45  *
46  * For best results we want to run copy propagation in between this and
47  * the combine constants pass: every time we manage to convert an alu to
48  * a uniform load, we move the uniform to the original alu destination. By
49  * running copy propagation immediately after we can reuse the uniform as
50  * source in more follow-up alu instructions, making them constant and allowing
51  * this pass to continue making progress. However, if we run the small
52  * immediates optimization before that, that pass can convert some of the movs
53  * to use small immediates instead of the uniforms and prevent us from making
54  * the best of this pass, as small immediates don't get copy propagated.
55  */
56 
57 #include "v3d_compiler.h"
58 
59 #include "util/half_float.h"
60 #include "util/u_math.h"
61 
62 static bool
opt_constant_add(struct v3d_compile * c,struct qinst * inst,union fi * values)63 opt_constant_add(struct v3d_compile *c, struct qinst *inst, union fi *values)
64 {
65         /* FIXME: handle more add operations */
66         struct qreg unif = { };
67         switch (inst->qpu.alu.add.op) {
68         case V3D_QPU_A_ADD:
69                 c->cursor = vir_after_inst(inst);
70                 unif = vir_uniform_ui(c, values[0].ui + values[1].ui);
71                 break;
72 
73         case V3D_QPU_A_VFPACK: {
74                 assert(inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE);
75 
76                 const uint32_t packed =
77                         (((uint32_t)_mesa_float_to_half(values[1].f)) << 16) |
78                         _mesa_float_to_half(values[0].f);
79 
80                 c->cursor = vir_after_inst(inst);
81                 unif = vir_uniform_ui(c, packed);
82                 break;
83         }
84 
85         default:
86                 return false;
87         }
88 
89         /* Remove the original ALU instruction and replace it with a uniform
90          * load. If the original instruction loaded an implicit uniform we
91          * need to replicate that in the new instruction.
92          */
93         struct qreg dst = inst->dst;
94         struct qinst *mov = vir_MOV_dest(c, dst, unif);
95         mov->uniform = inst->uniform;
96         vir_remove_instruction(c, inst);
97         if (dst.file == QFILE_TEMP)
98                 c->defs[dst.index] = mov;
99         return true;
100 }
101 
102 static bool
try_opt_constant_alu(struct v3d_compile * c,struct qinst * inst)103 try_opt_constant_alu(struct v3d_compile *c, struct qinst *inst)
104 {
105         if(inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
106                 return false;
107 
108         /* If the instruction does anything other than writing the result
109          * directly to the destination, skip.
110          */
111         if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
112             inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
113                 return false;
114         }
115 
116         if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
117             inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
118                 return false;
119         }
120 
121         assert(vir_get_nsrc(inst) <= 2);
122         union fi values[2];
123         for (int i = 0; i < vir_get_nsrc(inst); i++) {
124                 if (inst->src[i].file == QFILE_SMALL_IMM &&
125                     v3d_qpu_small_imm_unpack(c->devinfo,
126                                              inst->qpu.raddr_b,
127                                              &values[i].ui)) {
128                         continue;
129                 }
130 
131                 if (inst->src[i].file == QFILE_TEMP) {
132                         struct qinst *def = c->defs[inst->src[i].index];
133                         if (!def)
134                                 return false;
135 
136                         if ((def->qpu.sig.ldunif || def->qpu.sig.ldunifrf) &&
137                             c->uniform_contents[def->uniform] == QUNIFORM_CONSTANT) {
138                                 values[i].ui = c->uniform_data[def->uniform];
139                                 continue;
140                         }
141                 }
142 
143                 return false;
144         }
145 
146         /* FIXME: handle mul operations */
147         if (vir_is_add(inst))
148                 return opt_constant_add(c, inst, values);
149 
150         return false;
151 }
152 
153 bool
vir_opt_constant_alu(struct v3d_compile * c)154 vir_opt_constant_alu(struct v3d_compile *c)
155 {
156         bool progress = false;
157         vir_for_each_block(block, c) {
158                 vir_for_each_inst_safe(inst, block) {
159                         progress = try_opt_constant_alu(c, inst) || progress;
160                 }
161         }
162 
163         return progress;
164 }
165