1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 /* Midgard has some accelerated support for perspective projection on the
25  * load/store pipes. So the first perspective projection pass looks for
26  * lowered/open-coded perspective projection of the form "fmul (A.xyz,
27  * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native
28  * perspective division opcode (on the load/store pipe). Caveats apply: the
29  * frcp should be used only once to make this optimization worthwhile. And the
30  * source of the frcp ought to be a varying to make it worthwhile...
31  *
32  * The second pass in this file is a step #2 of sorts: fusing that load/store
33  * projection into a varying load instruction (they can be done together
34  * implicitly). This depends on the combination pass. Again caveat: the vary
35  * should only be used once to make this worthwhile.
36  */
37 
38 #include "compiler.h"
39 
40 static bool
is_swizzle_0(unsigned * swizzle)41 is_swizzle_0(unsigned *swizzle)
42 {
43         for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c)
44                 if (swizzle[c])
45                         return false;
46 
47         return true;
48 }
49 
50 bool
midgard_opt_combine_projection(compiler_context * ctx,midgard_block * block)51 midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
52 {
53         bool progress = false;
54 
55         mir_foreach_instr_in_block_safe(block, ins) {
56                 /* First search for fmul */
57                 if (ins->type != TAG_ALU_4) continue;
58                 if (ins->op != midgard_alu_op_fmul) continue;
59 
60                 /* TODO: Flip */
61 
62                 /* Check the swizzles */
63 
64                 if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask)) continue;
65                 if (!is_swizzle_0(ins->swizzle[1])) continue;
66 
67                 /* Awesome, we're the right form. Now check where src2 is from */
68                 unsigned frcp = ins->src[1];
69                 unsigned to = ins->dest;
70 
71                 if (frcp & PAN_IS_REG) continue;
72                 if (to & PAN_IS_REG) continue;
73 
74                 bool frcp_found = false;
75                 unsigned frcp_component = 0;
76                 unsigned frcp_from = 0;
77 
78                 mir_foreach_instr_in_block_safe(block, sub) {
79                         if (sub->dest != frcp) continue;
80 
81                         frcp_component = sub->swizzle[0][0];
82                         frcp_from = sub->src[0];
83 
84                         frcp_found =
85                                 (sub->type == TAG_ALU_4) &&
86                                 (sub->op == midgard_alu_op_frcp);
87                         break;
88                 }
89 
90                 if (!frcp_found) continue;
91                 if (frcp_from != ins->src[0]) continue;
92                 if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue;
93                 if (!mir_single_use(ctx, frcp)) continue;
94 
95                 /* Heuristic: check if the frcp is from a single-use varying */
96 
97                 bool ok = false;
98 
99                 /* One for frcp and one for fmul */
100                 if (mir_use_count(ctx, frcp_from) > 2) continue;
101 
102                 mir_foreach_instr_in_block_safe(block, v) {
103                         if (v->dest != frcp_from) continue;
104                         if (v->type != TAG_LOAD_STORE_4) break;
105                         if (!OP_IS_LOAD_VARY_F(v->op)) break;
106 
107                         ok = true;
108                         break;
109                 }
110 
111                 if (!ok)
112                         continue;
113 
114                 /* Nice, we got the form spot on. Let's convert! */
115 
116                 midgard_instruction accel = {
117                         .type = TAG_LOAD_STORE_4,
118                         .mask = ins->mask,
119                         .dest = to,
120                         .dest_type = nir_type_float32,
121                         .src = { frcp_from, ~0, ~0, ~0 },
122                         .src_types = { nir_type_float32 },
123                         .swizzle = SWIZZLE_IDENTITY_4,
124                         .op = frcp_component == COMPONENT_W ?
125                                 midgard_op_ldst_perspective_div_w :
126                                 midgard_op_ldst_perspective_div_z,
127                         .load_store = {
128                                 .bitsize_toggle = true,
129                         }
130                 };
131 
132                 mir_insert_instruction_before(ctx, ins, accel);
133                 mir_remove_instruction(ins);
134 
135                 progress |= true;
136         }
137 
138         return progress;
139 }
140 
141 bool
midgard_opt_varying_projection(compiler_context * ctx,midgard_block * block)142 midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
143 {
144         bool progress = false;
145 
146         mir_foreach_instr_in_block_safe(block, ins) {
147                 /* Search for a projection */
148                 if (ins->type != TAG_LOAD_STORE_4) continue;
149                 if (!OP_IS_PROJECTION(ins->op)) continue;
150 
151                 unsigned vary = ins->src[0];
152                 unsigned to = ins->dest;
153 
154                 if (vary & PAN_IS_REG) continue;
155                 if (to & PAN_IS_REG) continue;
156                 if (!mir_single_use(ctx, vary)) continue;
157 
158                 /* Check for a varying source. If we find it, we rewrite */
159 
160                 bool rewritten = false;
161 
162                 mir_foreach_instr_in_block_safe(block, v) {
163                         if (v->dest != vary) continue;
164                         if (v->type != TAG_LOAD_STORE_4) break;
165                         if (!OP_IS_LOAD_VARY_F(v->op)) break;
166 
167                         /* We found it, so rewrite it to project. Grab the
168                          * modifier */
169 
170                         midgard_varying_params p =
171                                 midgard_unpack_varying_params(v->load_store);
172 
173                         if (p.modifier != midgard_varying_mod_none)
174                                 break;
175 
176                         bool projects_w =
177                                 ins->op == midgard_op_ldst_perspective_div_w;
178 
179                         p.modifier = projects_w ?
180                                 midgard_varying_mod_perspective_w :
181                                 midgard_varying_mod_perspective_z;
182 
183                         midgard_pack_varying_params(&v->load_store, p);
184 
185                         /* Use the new destination */
186                         v->dest = to;
187 
188                         rewritten = true;
189                         break;
190                 }
191 
192                 if (rewritten)
193                         mir_remove_instruction(ins);
194 
195                 progress |= rewritten;
196         }
197 
198         return progress;
199 }
200