1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_vectorize.cpp
26  *
27  * Combines scalar assignments of the same expression (modulo swizzle) to
28  * multiple channels of the same variable into a single vectorized expression
29  * and assignment.
30  *
31  * Many generated shaders contain scalarized code. That is, they contain
32  *
33  * r1.x = log2(v0.x);
34  * r1.y = log2(v0.y);
35  * r1.z = log2(v0.z);
36  *
37  * rather than
38  *
39  * r1.xyz = log2(v0.xyz);
40  *
41  * We look for consecutive assignments of the same expression (modulo swizzle)
42  * to each channel of the same variable.
43  *
44  * For instance, we want to convert these three scalar operations
45  *
46  * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
47  * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
48  * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
49  *
50  * into a single vector operation
51  *
52  * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
53  */
54 
55 #include "ir.h"
56 #include "ir_visitor.h"
57 #include "ir_optimization.h"
58 #include "compiler/glsl_types.h"
59 #include "program/prog_instruction.h"
60 
61 namespace {
62 
63 class ir_vectorize_visitor : public ir_hierarchical_visitor {
64 public:
clear()65    void clear()
66    {
67       assignment[0] = NULL;
68       assignment[1] = NULL;
69       assignment[2] = NULL;
70       assignment[3] = NULL;
71       current_assignment = NULL;
72       last_assignment = NULL;
73       channels = 0;
74       has_swizzle = false;
75    }
76 
ir_vectorize_visitor()77    ir_vectorize_visitor()
78    {
79       clear();
80       progress = false;
81    }
82 
83    virtual ir_visitor_status visit_enter(ir_assignment *);
84    virtual ir_visitor_status visit_enter(ir_swizzle *);
85    virtual ir_visitor_status visit_enter(ir_dereference_array *);
86    virtual ir_visitor_status visit_enter(ir_expression *);
87    virtual ir_visitor_status visit_enter(ir_if *);
88    virtual ir_visitor_status visit_enter(ir_loop *);
89    virtual ir_visitor_status visit_enter(ir_texture *);
90 
91    virtual ir_visitor_status visit_leave(ir_assignment *);
92 
93    void try_vectorize();
94 
95    ir_assignment *assignment[4];
96    ir_assignment *current_assignment, *last_assignment;
97    unsigned channels;
98    bool has_swizzle;
99 
100    bool progress;
101 };
102 
103 } /* unnamed namespace */
104 
105 /**
106  * Rewrites the swizzles and types of a right-hand side of an assignment.
107  *
108  * From the example above, this function would be called (by visit_tree()) on
109  * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
110  * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
111  *
112  * The function operates on ir_expressions (and its operands) and ir_swizzles.
113  * For expressions it sets a new type and swizzles any non-expression and non-
114  * swizzle scalar operands into appropriately sized vector arguments. For
115  * example, if combining
116  *
117  * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
118  * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
119  *
120  * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
121  * (var_ref v1) such that the final result was
122  *
123  * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
124  *                                              (swiz xx (var_ref v1))))
125  *
126  * For swizzles, it sets a new type, and if the variable being swizzled is a
127  * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
128  * data parameter. If the swizzled variable is scalar, then the swizzle was
129  * added by an earlier call to rewrite_swizzle() on an expression, so the
130  * mask should not be modified.
131  */
132 static void
rewrite_swizzle(ir_instruction * ir,void * data)133 rewrite_swizzle(ir_instruction *ir, void *data)
134 {
135    ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
136 
137    switch (ir->ir_type) {
138    case ir_type_swizzle: {
139       ir_swizzle *swz = (ir_swizzle *)ir;
140       if (swz->val->type->is_vector()) {
141          swz->mask = *mask;
142       }
143       swz->type = glsl_type::get_instance(swz->type->base_type,
144                                           mask->num_components, 1);
145       break;
146    }
147    case ir_type_expression: {
148       ir_expression *expr = (ir_expression *)ir;
149       expr->type = glsl_type::get_instance(expr->type->base_type,
150                                            mask->num_components, 1);
151       for (unsigned i = 0; i < 4; i++) {
152          if (expr->operands[i]) {
153             ir_rvalue *rval = expr->operands[i]->as_rvalue();
154             if (rval && rval->type->is_scalar() &&
155                 !rval->as_expression() && !rval->as_swizzle()) {
156                expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
157                                                       mask->num_components);
158             }
159          }
160       }
161       break;
162    }
163    default:
164       break;
165    }
166 }
167 
168 /**
169  * Attempt to vectorize the previously saved assignments, and clear them from
170  * consideration.
171  *
172  * If the assignments are able to be combined, it modifies in-place the last
173  * assignment seen to be an equivalent vector form of the scalar assignments.
174  * It then removes the other now obsolete scalar assignments.
175  */
176 void
try_vectorize()177 ir_vectorize_visitor::try_vectorize()
178 {
179    if (this->last_assignment && this->channels > 1) {
180       ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
181 
182       this->last_assignment->write_mask = 0;
183 
184       for (unsigned i = 0, j = 0; i < 4; i++) {
185          if (this->assignment[i]) {
186             this->last_assignment->write_mask |= 1 << i;
187 
188             if (this->assignment[i] != this->last_assignment) {
189                this->assignment[i]->remove();
190             }
191 
192             switch (j) {
193             case 0: mask.x = i; break;
194             case 1: mask.y = i; break;
195             case 2: mask.z = i; break;
196             case 3: mask.w = i; break;
197             }
198 
199             j++;
200          }
201       }
202 
203       visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
204 
205       this->progress = true;
206    }
207    clear();
208 }
209 
210 /**
211  * Returns whether the write mask is a single channel.
212  */
213 static bool
single_channel_write_mask(unsigned write_mask)214 single_channel_write_mask(unsigned write_mask)
215 {
216    return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
217 }
218 
219 /**
220  * Translates single-channeled write mask to single-channeled swizzle.
221  */
222 static unsigned
write_mask_to_swizzle(unsigned write_mask)223 write_mask_to_swizzle(unsigned write_mask)
224 {
225    switch (write_mask) {
226    case WRITEMASK_X: return SWIZZLE_X;
227    case WRITEMASK_Y: return SWIZZLE_Y;
228    case WRITEMASK_Z: return SWIZZLE_Z;
229    case WRITEMASK_W: return SWIZZLE_W;
230    }
231    unreachable("not reached");
232 }
233 
234 /**
235  * Returns whether a single-channeled write mask matches a swizzle.
236  */
237 static bool
write_mask_matches_swizzle(unsigned write_mask,const ir_swizzle * swz)238 write_mask_matches_swizzle(unsigned write_mask,
239                            const ir_swizzle *swz)
240 {
241    return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
242            (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
243            (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
244            (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
245 }
246 
247 /**
248  * Upon entering an ir_assignment, attempt to vectorize the currently tracked
249  * assignments if the current assignment is not suitable. Keep a pointer to
250  * the current assignment.
251  */
252 ir_visitor_status
visit_enter(ir_assignment * ir)253 ir_vectorize_visitor::visit_enter(ir_assignment *ir)
254 {
255    ir_dereference *lhs = this->last_assignment != NULL ?
256                          this->last_assignment->lhs : NULL;
257    ir_rvalue *rhs = this->last_assignment != NULL ?
258                     this->last_assignment->rhs : NULL;
259 
260    if (ir->condition ||
261        this->channels >= 4 ||
262        !single_channel_write_mask(ir->write_mask) ||
263        this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL ||
264        (lhs && !ir->lhs->equals(lhs)) ||
265        (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
266       try_vectorize();
267    }
268 
269    this->current_assignment = ir;
270 
271    return visit_continue;
272 }
273 
274 /**
275  * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
276  * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
277  * matches the current assignment's write mask.
278  *
279  * If the write mask doesn't match the swizzle mask, remove the current
280  * assignment from further consideration.
281  */
282 ir_visitor_status
visit_enter(ir_swizzle * ir)283 ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
284 {
285    if (this->current_assignment) {
286       if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
287          this->has_swizzle = true;
288       } else {
289          this->current_assignment = NULL;
290       }
291    }
292    return visit_continue;
293 }
294 
295 /* Upon entering an ir_array_dereference, remove the current assignment from
296  * further consideration. Since the index of an array dereference must scalar,
297  * we are not able to vectorize it.
298  *
299  * FINISHME: If all of scalar indices are identical we could vectorize.
300  */
301 ir_visitor_status
visit_enter(ir_dereference_array *)302 ir_vectorize_visitor::visit_enter(ir_dereference_array *)
303 {
304    this->current_assignment = NULL;
305    return visit_continue_with_parent;
306 }
307 
308 /**
309  * Upon entering an ir_expression, remove the current assignment from further
310  * consideration if the expression operates horizontally on vectors.
311  */
312 ir_visitor_status
visit_enter(ir_expression * ir)313 ir_vectorize_visitor::visit_enter(ir_expression *ir)
314 {
315    if (ir->is_horizontal()) {
316       this->current_assignment = NULL;
317       return visit_continue_with_parent;
318    }
319    return visit_continue;
320 }
321 
322 /* Since there is no statement to visit between the "then" and "else"
323  * instructions try to vectorize before, in between, and after them to avoid
324  * combining statements from different basic blocks.
325  */
326 ir_visitor_status
visit_enter(ir_if * ir)327 ir_vectorize_visitor::visit_enter(ir_if *ir)
328 {
329    try_vectorize();
330 
331    visit_list_elements(this, &ir->then_instructions);
332    try_vectorize();
333 
334    visit_list_elements(this, &ir->else_instructions);
335    try_vectorize();
336 
337    return visit_continue_with_parent;
338 }
339 
340 /* Since there is no statement to visit between the instructions in the body of
341  * the loop and the instructions after it try to vectorize before and after the
342  * body to avoid combining statements from different basic blocks.
343  */
344 ir_visitor_status
visit_enter(ir_loop * ir)345 ir_vectorize_visitor::visit_enter(ir_loop *ir)
346 {
347    try_vectorize();
348 
349    visit_list_elements(this, &ir->body_instructions);
350    try_vectorize();
351 
352    return visit_continue_with_parent;
353 }
354 
355 /**
356  * Upon entering an ir_texture, remove the current assignment from
357  * further consideration. Vectorizing multiple texture lookups into one
358  * is wrong.
359  */
360 ir_visitor_status
visit_enter(ir_texture *)361 ir_vectorize_visitor::visit_enter(ir_texture *)
362 {
363    this->current_assignment = NULL;
364    return visit_continue_with_parent;
365 }
366 
367 /**
368  * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
369  * the swizzle mask(s) found were appropriate. Also save a pointer in
370  * ::last_assignment so that we can compare future assignments with it.
371  *
372  * Finally, clear ::current_assignment and ::has_swizzle.
373  */
374 ir_visitor_status
visit_leave(ir_assignment * ir)375 ir_vectorize_visitor::visit_leave(ir_assignment *ir)
376 {
377    if (this->has_swizzle && this->current_assignment) {
378       assert(this->current_assignment == ir);
379 
380       unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
381       this->assignment[channel] = ir;
382       this->channels++;
383 
384       this->last_assignment = this->current_assignment;
385    }
386    this->current_assignment = NULL;
387    this->has_swizzle = false;
388    return visit_continue;
389 }
390 
391 /**
392  * Combines scalar assignments of the same expression (modulo swizzle) to
393  * multiple channels of the same variable into a single vectorized expression
394  * and assignment.
395  */
396 bool
do_vectorize(exec_list * instructions)397 do_vectorize(exec_list *instructions)
398 {
399    ir_vectorize_visitor v;
400 
401    v.run(instructions);
402 
403    /* Try to vectorize the last assignments seen. */
404    v.try_vectorize();
405 
406    return v.progress;
407 }
408