1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_vectorize.cpp
26  *
27  * Combines scalar assignments of the same expression (modulo swizzle) to
28  * multiple channels of the same variable into a single vectorized expression
29  * and assignment.
30  *
31  * Many generated shaders contain scalarized code. That is, they contain
32  *
33  * r1.x = log2(v0.x);
34  * r1.y = log2(v0.y);
35  * r1.z = log2(v0.z);
36  *
37  * rather than
38  *
39  * r1.xyz = log2(v0.xyz);
40  *
41  * We look for consecutive assignments of the same expression (modulo swizzle)
42  * to each channel of the same variable.
43  *
44  * For instance, we want to convert these three scalar operations
45  *
46  * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
47  * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
48  * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
49  *
50  * into a single vector operation
51  *
52  * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
53  */
54 
55 #include "ir.h"
56 #include "ir_visitor.h"
57 #include "ir_optimization.h"
58 #include "glsl_types.h"
59 #include "program/prog_instruction.h"
60 
61 namespace {
62 
63 class ir_vectorize_visitor : public ir_hierarchical_visitor {
64 public:
clear()65    void clear()
66    {
67       assignment[0] = NULL;
68       assignment[1] = NULL;
69       assignment[2] = NULL;
70       assignment[3] = NULL;
71       current_assignment = NULL;
72       last_assignment = NULL;
73       channels = 0;
74       has_swizzle = false;
75    }
76 
ir_vectorize_visitor()77    ir_vectorize_visitor()
78    {
79       clear();
80       progress = false;
81    }
82 
83    virtual ir_visitor_status visit_enter(ir_assignment *);
84    virtual ir_visitor_status visit_enter(ir_swizzle *);
85    virtual ir_visitor_status visit_enter(ir_dereference_array *);
86    virtual ir_visitor_status visit_enter(ir_expression *);
87    virtual ir_visitor_status visit_enter(ir_if *);
88    virtual ir_visitor_status visit_enter(ir_loop *);
89    virtual ir_visitor_status visit_enter(ir_texture *);
90 
91    virtual ir_visitor_status visit_leave(ir_assignment *);
92 
93 
94    void try_vectorize();
95 
96    ir_assignment *assignment[4];
97    ir_assignment *current_assignment, *last_assignment;
98    unsigned channels;
99    bool has_swizzle;
100 
101    bool progress;
102 };
103 
104 } /* unnamed namespace */
105 
106 /**
107  * Rewrites the swizzles and types of a right-hand side of an assignment.
108  *
109  * From the example above, this function would be called (by visit_tree()) on
110  * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
111  * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
112  *
113  * The function operates on ir_expressions (and its operands) and ir_swizzles.
114  * For expressions it sets a new type and swizzles any non-expression and non-
115  * swizzle scalar operands into appropriately sized vector arguments. For
116  * example, if combining
117  *
118  * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
119  * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
120  *
121  * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
122  * (var_ref v1) such that the final result was
123  *
124  * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
125  *                                              (swiz xx (var_ref v1))))
126  *
127  * For swizzles, it sets a new type, and if the variable being swizzled is a
128  * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
129  * data parameter. If the swizzled variable is scalar, then the swizzle was
130  * added by an earlier call to rewrite_swizzle() on an expression, so the
131  * mask should not be modified.
132  */
133 static void
rewrite_swizzle(ir_instruction * ir,void * data)134 rewrite_swizzle(ir_instruction *ir, void *data)
135 {
136    ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
137 
138    switch (ir->ir_type) {
139    case ir_type_swizzle: {
140       ir_swizzle *swz = (ir_swizzle *)ir;
141       if (swz->val->type->is_vector()) {
142          swz->mask = *mask;
143       }
144       swz->type = glsl_type::get_instance(swz->type->base_type,
145                                           mask->num_components, 1);
146       break;
147    }
148    case ir_type_expression: {
149       ir_expression *expr = (ir_expression *)ir;
150       expr->type = glsl_type::get_instance(expr->type->base_type,
151                                            mask->num_components, 1);
152       for (unsigned i = 0; i < 4; i++) {
153          if (expr->operands[i]) {
154             ir_rvalue *rval = expr->operands[i]->as_rvalue();
155             if (rval && rval->type->is_scalar() &&
156                 !rval->as_expression() && !rval->as_swizzle()) {
157                expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
158                                                       mask->num_components);
159             }
160          }
161       }
162       break;
163    }
164    default:
165       break;
166    }
167 }
168 
169 /**
170  * Attempt to vectorize the previously saved assignments, and clear them from
171  * consideration.
172  *
173  * If the assignments are able to be combined, it modifies in-place the last
174  * assignment seen to be an equivalent vector form of the scalar assignments.
175  * It then removes the other now obsolete scalar assignments.
176  */
177 void
try_vectorize()178 ir_vectorize_visitor::try_vectorize()
179 {
180    if (this->last_assignment && this->channels > 1) {
181       ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
182 
183       this->last_assignment->write_mask = 0;
184 
185       for (unsigned i = 0, j = 0; i < 4; i++) {
186          if (this->assignment[i]) {
187             this->last_assignment->write_mask |= 1 << i;
188 
189             if (this->assignment[i] != this->last_assignment) {
190                this->assignment[i]->remove();
191             }
192 
193             switch (j) {
194             case 0: mask.x = i; break;
195             case 1: mask.y = i; break;
196             case 2: mask.z = i; break;
197             case 3: mask.w = i; break;
198             }
199 
200             j++;
201          }
202       }
203 
204       visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
205 
206       this->progress = true;
207    }
208    clear();
209 }
210 
211 /**
212  * Returns whether the write mask is a single channel.
213  */
214 static bool
single_channel_write_mask(unsigned write_mask)215 single_channel_write_mask(unsigned write_mask)
216 {
217    return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
218 }
219 
220 /**
221  * Translates single-channeled write mask to single-channeled swizzle.
222  */
223 static unsigned
write_mask_to_swizzle(unsigned write_mask)224 write_mask_to_swizzle(unsigned write_mask)
225 {
226    switch (write_mask) {
227    case WRITEMASK_X: return SWIZZLE_X;
228    case WRITEMASK_Y: return SWIZZLE_Y;
229    case WRITEMASK_Z: return SWIZZLE_Z;
230    case WRITEMASK_W: break;
231    }
232    return SWIZZLE_W;
233 }
234 
235 /**
236  * Returns whether a single-channeled write mask matches a swizzle.
237  */
238 static bool
write_mask_matches_swizzle(unsigned write_mask,const ir_swizzle * swz)239 write_mask_matches_swizzle(unsigned write_mask,
240                            const ir_swizzle *swz)
241 {
242    return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
243            (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
244            (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
245            (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
246 }
247 
248 /**
249  * Upon entering an ir_assignment, attempt to vectorize the currently tracked
250  * assignments if the current assignment is not suitable. Keep a pointer to
251  * the current assignment.
252  */
253 ir_visitor_status
visit_enter(ir_assignment * ir)254 ir_vectorize_visitor::visit_enter(ir_assignment *ir)
255 {
256    ir_dereference *lhs = this->last_assignment != NULL ?
257                          this->last_assignment->lhs : NULL;
258    ir_rvalue *rhs = this->last_assignment != NULL ?
259                     this->last_assignment->rhs : NULL;
260 
261    if (ir->condition ||
262        this->channels >= 4 ||
263        !single_channel_write_mask(ir->write_mask) ||
264        this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL ||
265        (lhs && !ir->lhs->equals(lhs)) ||
266        (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
267       try_vectorize();
268    }
269 
270    this->current_assignment = ir;
271 
272    return visit_continue;
273 }
274 
275 /**
276  * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
277  * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
278  * matches the current assignment's write mask.
279  *
280  * If the write mask doesn't match the swizzle mask, remove the current
281  * assignment from further consideration.
282  */
283 ir_visitor_status
visit_enter(ir_swizzle * ir)284 ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
285 {
286    if (this->current_assignment) {
287       if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
288          this->has_swizzle = true;
289       } else {
290          this->current_assignment = NULL;
291       }
292    }
293    return visit_continue;
294 }
295 
296 /* Upon entering an ir_array_dereference, remove the current assignment from
297  * further consideration. Since the index of an array dereference must scalar,
298  * we are not able to vectorize it.
299  *
300  * FINISHME: If all of scalar indices are identical we could vectorize.
301  */
302 ir_visitor_status
visit_enter(ir_dereference_array *)303 ir_vectorize_visitor::visit_enter(ir_dereference_array *)
304 {
305    this->current_assignment = NULL;
306    return visit_continue_with_parent;
307 }
308 
309 /**
310  * Upon entering an ir_expression, remove the current assignment from further
311  * consideration if the expression operates horizontally on vectors.
312  */
313 ir_visitor_status
visit_enter(ir_expression * ir)314 ir_vectorize_visitor::visit_enter(ir_expression *ir)
315 {
316    if (ir->is_horizontal()) {
317       this->current_assignment = NULL;
318       return visit_continue_with_parent;
319    }
320    return visit_continue;
321 }
322 
323 /* Since there is no statement to visit between the "then" and "else"
324  * instructions try to vectorize before, in between, and after them to avoid
325  * combining statements from different basic blocks.
326  */
327 ir_visitor_status
visit_enter(ir_if * ir)328 ir_vectorize_visitor::visit_enter(ir_if *ir)
329 {
330    try_vectorize();
331 
332    visit_list_elements(this, &ir->then_instructions);
333    try_vectorize();
334 
335    visit_list_elements(this, &ir->else_instructions);
336    try_vectorize();
337 
338    return visit_continue_with_parent;
339 }
340 
341 /* Since there is no statement to visit between the instructions in the body of
342  * the loop and the instructions after it try to vectorize before and after the
343  * body to avoid combining statements from different basic blocks.
344  */
345 ir_visitor_status
visit_enter(ir_loop * ir)346 ir_vectorize_visitor::visit_enter(ir_loop *ir)
347 {
348    try_vectorize();
349 
350    visit_list_elements(this, &ir->body_instructions);
351    try_vectorize();
352 
353    return visit_continue_with_parent;
354 }
355 
356 /**
357  * Upon entering an ir_texture, remove the current assignment from
358  * further consideration. Vectorizing multiple texture lookups into one
359  * is wrong.
360  */
361 ir_visitor_status
visit_enter(ir_texture *)362 ir_vectorize_visitor::visit_enter(ir_texture *)
363 {
364    this->current_assignment = NULL;
365    return visit_continue_with_parent;
366 }
367 
368 /**
369  * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
370  * the swizzle mask(s) found were appropriate. Also save a pointer in
371  * ::last_assignment so that we can compare future assignments with it.
372  *
373  * Finally, clear ::current_assignment and ::has_swizzle.
374  */
375 ir_visitor_status
visit_leave(ir_assignment * ir)376 ir_vectorize_visitor::visit_leave(ir_assignment *ir)
377 {
378    if (this->has_swizzle && this->current_assignment) {
379       assert(this->current_assignment == ir);
380 
381       unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
382       this->assignment[channel] = ir;
383       this->channels++;
384 
385       this->last_assignment = this->current_assignment;
386    }
387    this->current_assignment = NULL;
388    this->has_swizzle = false;
389    return visit_continue;
390 }
391 
392 /**
393  * Combines scalar assignments of the same expression (modulo swizzle) to
394  * multiple channels of the same variable into a single vectorized expression
395  * and assignment.
396  */
397 bool
do_vectorize(exec_list * instructions)398 do_vectorize(exec_list *instructions)
399 {
400    ir_vectorize_visitor v;
401 
402    v.run(instructions);
403 
404    /* Try to vectorize the last assignments seen. */
405    v.try_vectorize();
406 
407    return v.progress;
408 }
409