1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_vector.cpp
26  * IR lowering pass to remove some types of ir_quadop_vector
27  *
28  * \author Ian Romanick <ian.d.romanick@intel.com>
29  */
30 
31 #include "ir.h"
32 #include "ir_rvalue_visitor.h"
33 
34 namespace {
35 
36 class lower_vector_visitor : public ir_rvalue_visitor {
37 public:
lower_vector_visitor()38    lower_vector_visitor() : dont_lower_swz(false), progress(false)
39    {
40       /* empty */
41    }
42 
43    void handle_rvalue(ir_rvalue **rvalue);
44 
45    /**
46     * Should SWZ-like expressions be lowered?
47     */
48    bool dont_lower_swz;
49 
50    bool progress;
51 };
52 
53 } /* anonymous namespace */
54 
55 /**
56  * Determine if an IR expression tree looks like an extended swizzle
57  *
58  * Extended swizzles consist of access of a single vector source (with possible
59  * per component negation) and the constants -1, 0, or 1.
60  */
61 static bool
is_extended_swizzle(ir_expression * ir)62 is_extended_swizzle(ir_expression *ir)
63 {
64    /* Track any variables that are accessed by this expression.
65     */
66    ir_variable *var = NULL;
67 
68    assert(ir->operation == ir_quadop_vector);
69 
70    for (unsigned i = 0; i < ir->type->vector_elements; i++) {
71       ir_rvalue *op = ir->operands[i];
72 
73       while (op != NULL) {
74 	 switch (op->ir_type) {
75 	 case ir_type_constant: {
76 	    const ir_constant *const c = op->as_constant();
77 
78 	    if (!c->is_one() && !c->is_zero() && !c->is_negative_one())
79 	       return false;
80 
81 	    op = NULL;
82 	    break;
83 	 }
84 
85 	 case ir_type_dereference_variable: {
86 	    ir_dereference_variable *const d = (ir_dereference_variable *) op;
87 
88 	    if ((var != NULL) && (var != d->var))
89 	       return false;
90 
91 	    var = d->var;
92 	    op = NULL;
93 	    break;
94 	 }
95 
96 	 case ir_type_expression: {
97 	    ir_expression *const ex = (ir_expression *) op;
98 
99 	    if (ex->operation != ir_unop_neg)
100 	       return false;
101 
102 	    op = ex->operands[0];
103 	    break;
104 	 }
105 
106 	 case ir_type_swizzle:
107 	    op = ((ir_swizzle *) op)->val;
108 	    break;
109 
110 	 default:
111 	    return false;
112 	 }
113       }
114    }
115 
116    return true;
117 }
118 
119 void
handle_rvalue(ir_rvalue ** rvalue)120 lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue)
121 {
122    if (!*rvalue)
123       return;
124 
125    ir_expression *expr = (*rvalue)->as_expression();
126    if ((expr == NULL) || (expr->operation != ir_quadop_vector))
127       return;
128 
129    if (this->dont_lower_swz && is_extended_swizzle(expr))
130       return;
131 
132    /* FINISHME: Is this the right thing to use for the ralloc context?
133     */
134    void *const mem_ctx = expr;
135 
136    assert(expr->type->vector_elements == expr->num_operands);
137 
138    /* Generate a temporary with the same type as the ir_quadop_operation.
139     */
140    ir_variable *const temp =
141       new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary);
142 
143    this->base_ir->insert_before(temp);
144 
145    /* Counter of the number of components collected so far.
146     */
147    unsigned assigned;
148 
149    /* Write-mask in the destination that receives counted by 'assigned'.
150     */
151    unsigned write_mask;
152 
153 
154    /* Generate upto four assignments to that variable.  Try to group component
155     * assignments together:
156     *
157     * - All constant components can be assigned at once.
158     * - All assigments of components from a single variable with the same
159     *   unary operator can be assigned at once.
160     */
161    ir_constant_data d = { { 0 } };
162 
163    assigned = 0;
164    write_mask = 0;
165    for (unsigned i = 0; i < expr->type->vector_elements; i++) {
166       const ir_constant *const c = expr->operands[i]->as_constant();
167 
168       if (c == NULL)
169 	 continue;
170 
171       switch (expr->type->base_type) {
172       case GLSL_TYPE_UINT:  d.u[assigned] = c->value.u[0]; break;
173       case GLSL_TYPE_INT:   d.i[assigned] = c->value.i[0]; break;
174       case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break;
175       case GLSL_TYPE_BOOL:  d.b[assigned] = c->value.b[0]; break;
176       default:              assert(!"Should not get here."); break;
177       }
178 
179       write_mask |= (1U << i);
180       assigned++;
181    }
182 
183    assert((write_mask == 0) == (assigned == 0));
184 
185    /* If there were constant values, generate an assignment.
186     */
187    if (assigned > 0) {
188       ir_constant *const c =
189 	 new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type,
190 							  assigned, 1),
191 				  &d);
192       ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
193       ir_assignment *const assign =
194 	 new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask);
195 
196       this->base_ir->insert_before(assign);
197    }
198 
199    /* FINISHME: This should try to coalesce assignments.
200     */
201    for (unsigned i = 0; i < expr->type->vector_elements; i++) {
202       if (expr->operands[i]->ir_type == ir_type_constant)
203 	 continue;
204 
205       ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
206       ir_assignment *const assign =
207 	 new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i));
208 
209       this->base_ir->insert_before(assign);
210       assigned++;
211    }
212 
213    assert(assigned == expr->type->vector_elements);
214 
215    *rvalue = new(mem_ctx) ir_dereference_variable(temp);
216    this->progress = true;
217 }
218 
219 bool
lower_quadop_vector(exec_list * instructions,bool dont_lower_swz)220 lower_quadop_vector(exec_list *instructions, bool dont_lower_swz)
221 {
222    lower_vector_visitor v;
223 
224    v.dont_lower_swz = dont_lower_swz;
225    visit_list_elements(&v, instructions);
226 
227    return v.progress;
228 }
229