1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file opt_vectorize.cpp
26 *
27 * Combines scalar assignments of the same expression (modulo swizzle) to
28 * multiple channels of the same variable into a single vectorized expression
29 * and assignment.
30 *
31 * Many generated shaders contain scalarized code. That is, they contain
32 *
33 * r1.x = log2(v0.x);
34 * r1.y = log2(v0.y);
35 * r1.z = log2(v0.z);
36 *
37 * rather than
38 *
39 * r1.xyz = log2(v0.xyz);
40 *
41 * We look for consecutive assignments of the same expression (modulo swizzle)
42 * to each channel of the same variable.
43 *
44 * For instance, we want to convert these three scalar operations
45 *
46 * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
47 * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
48 * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
49 *
50 * into a single vector operation
51 *
52 * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
53 */
54
55 #include "ir.h"
56 #include "ir_visitor.h"
57 #include "ir_optimization.h"
58 #include "glsl_types.h"
59 #include "program/prog_instruction.h"
60
61 namespace {
62
63 class ir_vectorize_visitor : public ir_hierarchical_visitor {
64 public:
clear()65 void clear()
66 {
67 assignment[0] = NULL;
68 assignment[1] = NULL;
69 assignment[2] = NULL;
70 assignment[3] = NULL;
71 current_assignment = NULL;
72 last_assignment = NULL;
73 channels = 0;
74 has_swizzle = false;
75 }
76
ir_vectorize_visitor()77 ir_vectorize_visitor()
78 {
79 clear();
80 progress = false;
81 }
82
83 virtual ir_visitor_status visit_enter(ir_assignment *);
84 virtual ir_visitor_status visit_enter(ir_swizzle *);
85 virtual ir_visitor_status visit_enter(ir_dereference_array *);
86 virtual ir_visitor_status visit_enter(ir_expression *);
87 virtual ir_visitor_status visit_enter(ir_if *);
88 virtual ir_visitor_status visit_enter(ir_loop *);
89 virtual ir_visitor_status visit_enter(ir_texture *);
90
91 virtual ir_visitor_status visit_leave(ir_assignment *);
92
93
94 void try_vectorize();
95
96 ir_assignment *assignment[4];
97 ir_assignment *current_assignment, *last_assignment;
98 unsigned channels;
99 bool has_swizzle;
100
101 bool progress;
102 };
103
104 } /* unnamed namespace */
105
106 /**
107 * Rewrites the swizzles and types of a right-hand side of an assignment.
108 *
109 * From the example above, this function would be called (by visit_tree()) on
110 * the nodes of the tree (expression float log2 (swiz z (var_ref v0))),
111 * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))).
112 *
113 * The function operates on ir_expressions (and its operands) and ir_swizzles.
114 * For expressions it sets a new type and swizzles any non-expression and non-
115 * swizzle scalar operands into appropriately sized vector arguments. For
116 * example, if combining
117 *
118 * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
119 * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
120 *
121 * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
122 * (var_ref v1) such that the final result was
123 *
124 * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
125 * (swiz xx (var_ref v1))))
126 *
127 * For swizzles, it sets a new type, and if the variable being swizzled is a
128 * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
129 * data parameter. If the swizzled variable is scalar, then the swizzle was
130 * added by an earlier call to rewrite_swizzle() on an expression, so the
131 * mask should not be modified.
132 */
133 static void
rewrite_swizzle(ir_instruction * ir,void * data)134 rewrite_swizzle(ir_instruction *ir, void *data)
135 {
136 ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
137
138 switch (ir->ir_type) {
139 case ir_type_swizzle: {
140 ir_swizzle *swz = (ir_swizzle *)ir;
141 if (swz->val->type->is_vector()) {
142 swz->mask = *mask;
143 }
144 swz->type = glsl_type::get_instance(swz->type->base_type,
145 mask->num_components, 1);
146 break;
147 }
148 case ir_type_expression: {
149 ir_expression *expr = (ir_expression *)ir;
150 expr->type = glsl_type::get_instance(expr->type->base_type,
151 mask->num_components, 1);
152 for (unsigned i = 0; i < 4; i++) {
153 if (expr->operands[i]) {
154 ir_rvalue *rval = expr->operands[i]->as_rvalue();
155 if (rval && rval->type->is_scalar() &&
156 !rval->as_expression() && !rval->as_swizzle()) {
157 expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
158 mask->num_components);
159 }
160 }
161 }
162 break;
163 }
164 default:
165 break;
166 }
167 }
168
169 /**
170 * Attempt to vectorize the previously saved assignments, and clear them from
171 * consideration.
172 *
173 * If the assignments are able to be combined, it modifies in-place the last
174 * assignment seen to be an equivalent vector form of the scalar assignments.
175 * It then removes the other now obsolete scalar assignments.
176 */
177 void
try_vectorize()178 ir_vectorize_visitor::try_vectorize()
179 {
180 if (this->last_assignment && this->channels > 1) {
181 ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
182
183 this->last_assignment->write_mask = 0;
184
185 for (unsigned i = 0, j = 0; i < 4; i++) {
186 if (this->assignment[i]) {
187 this->last_assignment->write_mask |= 1 << i;
188
189 if (this->assignment[i] != this->last_assignment) {
190 this->assignment[i]->remove();
191 }
192
193 switch (j) {
194 case 0: mask.x = i; break;
195 case 1: mask.y = i; break;
196 case 2: mask.z = i; break;
197 case 3: mask.w = i; break;
198 }
199
200 j++;
201 }
202 }
203
204 visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
205
206 this->progress = true;
207 }
208 clear();
209 }
210
211 /**
212 * Returns whether the write mask is a single channel.
213 */
214 static bool
single_channel_write_mask(unsigned write_mask)215 single_channel_write_mask(unsigned write_mask)
216 {
217 return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
218 }
219
220 /**
221 * Translates single-channeled write mask to single-channeled swizzle.
222 */
223 static unsigned
write_mask_to_swizzle(unsigned write_mask)224 write_mask_to_swizzle(unsigned write_mask)
225 {
226 switch (write_mask) {
227 case WRITEMASK_X: return SWIZZLE_X;
228 case WRITEMASK_Y: return SWIZZLE_Y;
229 case WRITEMASK_Z: return SWIZZLE_Z;
230 case WRITEMASK_W: break;
231 }
232 return SWIZZLE_W;
233 }
234
235 /**
236 * Returns whether a single-channeled write mask matches a swizzle.
237 */
238 static bool
write_mask_matches_swizzle(unsigned write_mask,const ir_swizzle * swz)239 write_mask_matches_swizzle(unsigned write_mask,
240 const ir_swizzle *swz)
241 {
242 return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
243 (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
244 (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
245 (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
246 }
247
248 /**
249 * Upon entering an ir_assignment, attempt to vectorize the currently tracked
250 * assignments if the current assignment is not suitable. Keep a pointer to
251 * the current assignment.
252 */
253 ir_visitor_status
visit_enter(ir_assignment * ir)254 ir_vectorize_visitor::visit_enter(ir_assignment *ir)
255 {
256 ir_dereference *lhs = this->last_assignment != NULL ?
257 this->last_assignment->lhs : NULL;
258 ir_rvalue *rhs = this->last_assignment != NULL ?
259 this->last_assignment->rhs : NULL;
260
261 if (ir->condition ||
262 this->channels >= 4 ||
263 !single_channel_write_mask(ir->write_mask) ||
264 this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL ||
265 (lhs && !ir->lhs->equals(lhs)) ||
266 (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
267 try_vectorize();
268 }
269
270 this->current_assignment = ir;
271
272 return visit_continue;
273 }
274
275 /**
276 * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
277 * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
278 * matches the current assignment's write mask.
279 *
280 * If the write mask doesn't match the swizzle mask, remove the current
281 * assignment from further consideration.
282 */
283 ir_visitor_status
visit_enter(ir_swizzle * ir)284 ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
285 {
286 if (this->current_assignment) {
287 if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
288 this->has_swizzle = true;
289 } else {
290 this->current_assignment = NULL;
291 }
292 }
293 return visit_continue;
294 }
295
296 /* Upon entering an ir_array_dereference, remove the current assignment from
297 * further consideration. Since the index of an array dereference must scalar,
298 * we are not able to vectorize it.
299 *
300 * FINISHME: If all of scalar indices are identical we could vectorize.
301 */
302 ir_visitor_status
visit_enter(ir_dereference_array *)303 ir_vectorize_visitor::visit_enter(ir_dereference_array *)
304 {
305 this->current_assignment = NULL;
306 return visit_continue_with_parent;
307 }
308
309 /**
310 * Upon entering an ir_expression, remove the current assignment from further
311 * consideration if the expression operates horizontally on vectors.
312 */
313 ir_visitor_status
visit_enter(ir_expression * ir)314 ir_vectorize_visitor::visit_enter(ir_expression *ir)
315 {
316 if (ir->is_horizontal()) {
317 this->current_assignment = NULL;
318 return visit_continue_with_parent;
319 }
320 return visit_continue;
321 }
322
323 /* Since there is no statement to visit between the "then" and "else"
324 * instructions try to vectorize before, in between, and after them to avoid
325 * combining statements from different basic blocks.
326 */
327 ir_visitor_status
visit_enter(ir_if * ir)328 ir_vectorize_visitor::visit_enter(ir_if *ir)
329 {
330 try_vectorize();
331
332 visit_list_elements(this, &ir->then_instructions);
333 try_vectorize();
334
335 visit_list_elements(this, &ir->else_instructions);
336 try_vectorize();
337
338 return visit_continue_with_parent;
339 }
340
341 /* Since there is no statement to visit between the instructions in the body of
342 * the loop and the instructions after it try to vectorize before and after the
343 * body to avoid combining statements from different basic blocks.
344 */
345 ir_visitor_status
visit_enter(ir_loop * ir)346 ir_vectorize_visitor::visit_enter(ir_loop *ir)
347 {
348 try_vectorize();
349
350 visit_list_elements(this, &ir->body_instructions);
351 try_vectorize();
352
353 return visit_continue_with_parent;
354 }
355
356 /**
357 * Upon entering an ir_texture, remove the current assignment from
358 * further consideration. Vectorizing multiple texture lookups into one
359 * is wrong.
360 */
361 ir_visitor_status
visit_enter(ir_texture *)362 ir_vectorize_visitor::visit_enter(ir_texture *)
363 {
364 this->current_assignment = NULL;
365 return visit_continue_with_parent;
366 }
367
368 /**
369 * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
370 * the swizzle mask(s) found were appropriate. Also save a pointer in
371 * ::last_assignment so that we can compare future assignments with it.
372 *
373 * Finally, clear ::current_assignment and ::has_swizzle.
374 */
375 ir_visitor_status
visit_leave(ir_assignment * ir)376 ir_vectorize_visitor::visit_leave(ir_assignment *ir)
377 {
378 if (this->has_swizzle && this->current_assignment) {
379 assert(this->current_assignment == ir);
380
381 unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
382 this->assignment[channel] = ir;
383 this->channels++;
384
385 this->last_assignment = this->current_assignment;
386 }
387 this->current_assignment = NULL;
388 this->has_swizzle = false;
389 return visit_continue;
390 }
391
392 /**
393 * Combines scalar assignments of the same expression (modulo swizzle) to
394 * multiple channels of the same variable into a single vectorized expression
395 * and assignment.
396 */
397 bool
do_vectorize(exec_list * instructions)398 do_vectorize(exec_list *instructions)
399 {
400 ir_vectorize_visitor v;
401
402 v.run(instructions);
403
404 /* Try to vectorize the last assignments seen. */
405 v.try_vectorize();
406
407 return v.progress;
408 }
409