1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_int64.cpp
26  *
27  * Lower 64-bit operations to 32-bit operations.  Each 64-bit value is lowered
28  * to a uvec2.  For each operation that can be lowered, there is a function
29  * called __builtin_foo with the same number of parameters that takes uvec2
30  * sources and produces uvec2 results.  An operation like
31  *
32  *     uint64_t(x) * uint64_t(y)
33  *
34  * becomes
35  *
36  *     packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
37  */
38 
39 #include "main/macros.h"
40 #include "compiler/glsl_types.h"
41 #include "ir.h"
42 #include "ir_rvalue_visitor.h"
43 #include "ir_builder.h"
44 #include "ir_optimization.h"
45 #include "util/hash_table.h"
46 #include "builtin_functions.h"
47 
48 typedef ir_function_signature *(*function_generator)(void *mem_ctx,
49                                                      builtin_available_predicate avail);
50 
51 using namespace ir_builder;
52 
53 namespace lower_64bit {
54 void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
55 
56 ir_dereference_variable *compact_destination(ir_factory &,
57                                              const glsl_type *type,
58                                              ir_variable *result[4]);
59 
60 ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
61                                      ir_expression *ir,
62                                      ir_function_signature *callee);
63 };
64 
65 using namespace lower_64bit;
66 
67 namespace {
68 
69 class lower_64bit_visitor : public ir_rvalue_visitor {
70 public:
lower_64bit_visitor(void * mem_ctx,exec_list * instructions,unsigned lower)71    lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
72       : progress(false), lower(lower),
73         function_list(), added_functions(&function_list, mem_ctx)
74    {
75       functions = _mesa_hash_table_create(mem_ctx,
76                                           _mesa_hash_string,
77                                           _mesa_key_string_equal);
78 
79       foreach_in_list(ir_instruction, node, instructions) {
80          ir_function *const f = node->as_function();
81 
82          if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
83             continue;
84 
85          add_function(f);
86       }
87    }
88 
~lower_64bit_visitor()89    ~lower_64bit_visitor()
90    {
91       _mesa_hash_table_destroy(functions, NULL);
92    }
93 
94    void handle_rvalue(ir_rvalue **rvalue);
95 
add_function(ir_function * f)96    void add_function(ir_function *f)
97    {
98       _mesa_hash_table_insert(functions, f->name, f);
99    }
100 
find_function(const char * name)101    ir_function *find_function(const char *name)
102    {
103       struct hash_entry *const entry =
104          _mesa_hash_table_search(functions, name);
105 
106       return entry != NULL ? (ir_function *) entry->data : NULL;
107    }
108 
109    bool progress;
110 
111 private:
112    unsigned lower; /** Bitfield of which operations to lower */
113 
114    /** Hashtable containing all of the known functions in the IR */
115    struct hash_table *functions;
116 
117 public:
118    exec_list function_list;
119 
120 private:
121    ir_factory added_functions;
122 
123    ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
124                         function_generator generator);
125 };
126 
127 } /* anonymous namespace */
128 
129 /**
130  * Determine if a particular type of lowering should occur
131  */
132 #define lowering(x) (this->lower & x)
133 
134 bool
lower_64bit_integer_instructions(exec_list * instructions,unsigned what_to_lower)135 lower_64bit_integer_instructions(exec_list *instructions,
136                                  unsigned what_to_lower)
137 {
138    if (instructions->is_empty())
139       return false;
140 
141    ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
142    void *const mem_ctx = ralloc_parent(first_inst);
143    lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
144 
145    visit_list_elements(&v, instructions);
146 
147    if (v.progress && !v.function_list.is_empty()) {
148       /* Move all of the nodes from function_list to the head if the incoming
149        * instruction list.
150        */
151       exec_node *const after = &instructions->head_sentinel;
152       exec_node *const before = instructions->head_sentinel.next;
153       exec_node *const head = v.function_list.head_sentinel.next;
154       exec_node *const tail = v.function_list.tail_sentinel.prev;
155 
156       before->next = head;
157       head->prev = before;
158 
159       after->prev = tail;
160       tail->next = after;
161    }
162 
163    return v.progress;
164 }
165 
166 
167 /**
168  * Expand individual 64-bit values to uvec2 values
169  *
170  * Each operation is in one of a few forms.
171  *
172  *     vector op vector
173  *     vector op scalar
174  *     scalar op vector
175  *     scalar op scalar
176  *
177  * In the 'vector op vector' case, the two vectors must have the same size.
178  * In a way, the 'scalar op scalar' form is special case of the 'vector op
179  * vector' form.
180  *
181  * This method generates a new set of uvec2 values for each element of a
182  * single operand.  If the operand is a scalar, the uvec2 is replicated
183  * multiple times.  A value like
184  *
185  *     u64vec3(a) + u64vec3(b)
186  *
187  * becomes
188  *
189  *     u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
190  *     uvec2 tmp1 = unpackUint2x32(tmp0.x);
191  *     uvec2 tmp2 = unpackUint2x32(tmp0.y);
192  *     uvec2 tmp3 = unpackUint2x32(tmp0.z);
193  *
194  * and the returned operands array contains ir_variable pointers to
195  *
196  *     { tmp1, tmp2, tmp3, tmp1 }
197  */
198 void
expand_source(ir_factory & body,ir_rvalue * val,ir_variable ** expanded_src)199 lower_64bit::expand_source(ir_factory &body,
200                            ir_rvalue *val,
201                            ir_variable **expanded_src)
202 {
203    assert(val->type->is_integer_64());
204 
205    ir_variable *const temp = body.make_temp(val->type, "tmp");
206 
207    body.emit(assign(temp, val));
208 
209    const ir_expression_operation unpack_opcode =
210       val->type->base_type == GLSL_TYPE_UINT64
211       ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
212 
213    const glsl_type *const type =
214       val->type->base_type == GLSL_TYPE_UINT64
215       ? glsl_type::uvec2_type : glsl_type::ivec2_type;
216 
217    unsigned i;
218    for (i = 0; i < val->type->vector_elements; i++) {
219       expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
220 
221       body.emit(assign(expanded_src[i],
222                        expr(unpack_opcode, swizzle(temp, i, 1))));
223    }
224 
225    for (/* empty */; i < 4; i++)
226       expanded_src[i] = expanded_src[0];
227 }
228 
229 /**
230  * Convert a series of uvec2 results into a single 64-bit integer vector
231  */
232 ir_dereference_variable *
compact_destination(ir_factory & body,const glsl_type * type,ir_variable * result[4])233 lower_64bit::compact_destination(ir_factory &body,
234                                  const glsl_type *type,
235                                  ir_variable *result[4])
236 {
237    const ir_expression_operation pack_opcode =
238       type->base_type == GLSL_TYPE_UINT64
239       ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
240 
241    ir_variable *const compacted_result =
242       body.make_temp(type, "compacted_64bit_result");
243 
244    for (unsigned i = 0; i < type->vector_elements; i++) {
245       body.emit(assign(compacted_result,
246                        expr(pack_opcode, result[i]),
247                        1U << i));
248    }
249 
250    void *const mem_ctx = ralloc_parent(compacted_result);
251    return new(mem_ctx) ir_dereference_variable(compacted_result);
252 }
253 
254 ir_rvalue *
lower_op_to_function_call(ir_instruction * base_ir,ir_expression * ir,ir_function_signature * callee)255 lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
256                                        ir_expression *ir,
257                                        ir_function_signature *callee)
258 {
259    const unsigned num_operands = ir->num_operands;
260    ir_variable *src[4][4];
261    ir_variable *dst[4];
262    void *const mem_ctx = ralloc_parent(ir);
263    exec_list instructions;
264    unsigned source_components = 0;
265    const glsl_type *const result_type =
266       ir->type->base_type == GLSL_TYPE_UINT64
267       ? glsl_type::uvec2_type : glsl_type::ivec2_type;
268 
269    ir_factory body(&instructions, mem_ctx);
270 
271    for (unsigned i = 0; i < num_operands; i++) {
272       expand_source(body, ir->operands[i], src[i]);
273 
274       if (ir->operands[i]->type->vector_elements > source_components)
275          source_components = ir->operands[i]->type->vector_elements;
276    }
277 
278    for (unsigned i = 0; i < source_components; i++) {
279       dst[i] = body.make_temp(result_type, "expanded_64bit_result");
280 
281       exec_list parameters;
282 
283       for (unsigned j = 0; j < num_operands; j++)
284          parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
285 
286       ir_dereference_variable *const return_deref =
287          new(mem_ctx) ir_dereference_variable(dst[i]);
288 
289       ir_call *const c = new(mem_ctx) ir_call(callee,
290                                               return_deref,
291                                               &parameters);
292 
293       body.emit(c);
294    }
295 
296    ir_rvalue *const rv = compact_destination(body, ir->type, dst);
297 
298    /* Move all of the nodes from instructions between base_ir and the
299     * instruction before it.
300     */
301    exec_node *const after = base_ir;
302    exec_node *const before = after->prev;
303    exec_node *const head = instructions.head_sentinel.next;
304    exec_node *const tail = instructions.tail_sentinel.prev;
305 
306    before->next = head;
307    head->prev = before;
308 
309    after->prev = tail;
310    tail->next = after;
311 
312    return rv;
313 }
314 
315 ir_rvalue *
handle_op(ir_expression * ir,const char * function_name,function_generator generator)316 lower_64bit_visitor::handle_op(ir_expression *ir,
317                                const char *function_name,
318                                function_generator generator)
319 {
320    for (unsigned i = 0; i < ir->num_operands; i++)
321       if (!ir->operands[i]->type->is_integer_64())
322          return ir;
323 
324    /* Get a handle to the correct ir_function_signature for the core
325     * operation.
326     */
327    ir_function_signature *callee = NULL;
328    ir_function *f = find_function(function_name);
329 
330    if (f != NULL) {
331       callee = (ir_function_signature *) f->signatures.get_head();
332       assert(callee != NULL && callee->ir_type == ir_type_function_signature);
333    } else {
334       f = new(base_ir) ir_function(function_name);
335       callee = generator(base_ir, NULL);
336 
337       f->add_signature(callee);
338 
339       add_function(f);
340    }
341 
342    this->progress = true;
343    return lower_op_to_function_call(this->base_ir, ir, callee);
344 }
345 
346 void
handle_rvalue(ir_rvalue ** rvalue)347 lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
348 {
349    if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
350       return;
351 
352    ir_expression *const ir = (*rvalue)->as_expression();
353    assert(ir != NULL);
354 
355    switch (ir->operation) {
356    case ir_unop_sign:
357       if (lowering(SIGN64)) {
358          *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
359       }
360       break;
361 
362    case ir_binop_div:
363       if (lowering(DIV64)) {
364          if (ir->type->base_type == GLSL_TYPE_UINT64) {
365             *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
366          } else {
367             *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
368          }
369       }
370       break;
371 
372    case ir_binop_mod:
373       if (lowering(MOD64)) {
374          if (ir->type->base_type == GLSL_TYPE_UINT64) {
375             *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
376          } else {
377             *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
378          }
379       }
380       break;
381 
382    case ir_binop_mul:
383       if (lowering(MUL64)) {
384          *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
385       }
386       break;
387 
388    default:
389       break;
390    }
391 }
392