1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/ralloc.h"
26 #include "compiler/nir/nir.h"
27 #include "pipe/p_state.h"
28 
29 
30 #include "gpir.h"
31 #include "lima_context.h"
32 
gpir_create_reg(gpir_compiler * comp)33 gpir_reg *gpir_create_reg(gpir_compiler *comp)
34 {
35    gpir_reg *reg = ralloc(comp, gpir_reg);
36    reg->index = comp->cur_reg++;
37    list_addtail(&reg->list, &comp->reg_list);
38    return reg;
39 }
40 
reg_for_nir_reg(gpir_compiler * comp,nir_register * nir_reg)41 static gpir_reg *reg_for_nir_reg(gpir_compiler *comp, nir_register *nir_reg)
42 {
43    unsigned index = nir_reg->index;
44    gpir_reg *reg = comp->reg_for_reg[index];
45    if (reg)
46       return reg;
47    reg = gpir_create_reg(comp);
48    comp->reg_for_reg[index] = reg;
49    return reg;
50 }
51 
register_node_ssa(gpir_block * block,gpir_node * node,nir_ssa_def * ssa)52 static void register_node_ssa(gpir_block *block, gpir_node *node, nir_ssa_def *ssa)
53 {
54    block->comp->node_for_ssa[ssa->index] = node;
55    snprintf(node->name, sizeof(node->name), "ssa%d", ssa->index);
56 
57    /* If any uses are outside the current block, we'll need to create a
58     * register and store to it.
59     */
60    bool needs_register = false;
61    nir_foreach_use(use, ssa) {
62       if (use->parent_instr->block != ssa->parent_instr->block) {
63          needs_register = true;
64          break;
65       }
66    }
67 
68    if (!needs_register) {
69       nir_foreach_if_use(use, ssa) {
70          if (nir_cf_node_prev(&use->parent_if->cf_node) !=
71              &ssa->parent_instr->block->cf_node) {
72             needs_register = true;
73             break;
74          }
75       }
76    }
77 
78    if (needs_register) {
79       gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg);
80       store->child = node;
81       store->reg = gpir_create_reg(block->comp);
82       gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
83       list_addtail(&store->node.list, &block->node_list);
84       block->comp->reg_for_ssa[ssa->index] = store->reg;
85    }
86 }
87 
register_node_reg(gpir_block * block,gpir_node * node,nir_reg_dest * nir_reg)88 static void register_node_reg(gpir_block *block, gpir_node *node, nir_reg_dest *nir_reg)
89 {
90    block->comp->node_for_reg[nir_reg->reg->index] = node;
91    gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg);
92 
93    snprintf(node->name, sizeof(node->name), "reg%d", nir_reg->reg->index);
94 
95    store->child = node;
96    store->reg = reg_for_nir_reg(block->comp, nir_reg->reg);
97    gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
98 
99    list_addtail(&store->node.list, &block->node_list);
100 }
101 
102 /* Register the given gpir_node as providing the given NIR destination, so
103  * that gpir_node_find() will return it. Also insert any stores necessary if
104  * the destination will be used after the end of this basic block. The node
105  * must already be inserted.
106  */
register_node(gpir_block * block,gpir_node * node,nir_dest * dest)107 static void register_node(gpir_block *block, gpir_node *node, nir_dest *dest)
108 {
109    if (dest->is_ssa)
110       register_node_ssa(block, node, &dest->ssa);
111    else
112       register_node_reg(block, node, &dest->reg);
113 }
114 
gpir_node_find(gpir_block * block,nir_src * src,int channel)115 static gpir_node *gpir_node_find(gpir_block *block, nir_src *src,
116                                  int channel)
117 {
118    gpir_reg *reg = NULL;
119    gpir_node *pred = NULL;
120    if (src->is_ssa) {
121       if (src->ssa->num_components > 1) {
122          for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++) {
123             if (block->comp->vector_ssa[i].ssa == src->ssa->index) {
124                return block->comp->vector_ssa[i].nodes[channel];
125             }
126          }
127       } else {
128          gpir_node *pred = block->comp->node_for_ssa[src->ssa->index];
129          if (pred->block == block)
130             return pred;
131          reg = block->comp->reg_for_ssa[src->ssa->index];
132       }
133    } else {
134       pred = block->comp->node_for_reg[src->reg.reg->index];
135       if (pred && pred->block == block)
136          return pred;
137       reg = reg_for_nir_reg(block->comp, src->reg.reg);
138    }
139 
140    assert(reg);
141    pred = gpir_node_create(block, gpir_op_load_reg);
142    gpir_load_node *load = gpir_node_to_load(pred);
143    load->reg = reg;
144    list_addtail(&pred->list, &block->node_list);
145 
146    return pred;
147 }
148 
149 static int nir_to_gpir_opcodes[nir_num_opcodes] = {
150    /* not supported */
151    [0 ... nir_last_opcode] = -1,
152 
153    [nir_op_fmul] = gpir_op_mul,
154    [nir_op_fadd] = gpir_op_add,
155    [nir_op_fneg] = gpir_op_neg,
156    [nir_op_fmin] = gpir_op_min,
157    [nir_op_fmax] = gpir_op_max,
158    [nir_op_frcp] = gpir_op_rcp,
159    [nir_op_frsq] = gpir_op_rsqrt,
160    [nir_op_fexp2] = gpir_op_exp2,
161    [nir_op_flog2] = gpir_op_log2,
162    [nir_op_slt] = gpir_op_lt,
163    [nir_op_sge] = gpir_op_ge,
164    [nir_op_fcsel] = gpir_op_select,
165    [nir_op_ffloor] = gpir_op_floor,
166    [nir_op_fsign] = gpir_op_sign,
167    [nir_op_seq] = gpir_op_eq,
168    [nir_op_sne] = gpir_op_ne,
169    [nir_op_fabs] = gpir_op_abs,
170 };
171 
gpir_emit_alu(gpir_block * block,nir_instr * ni)172 static bool gpir_emit_alu(gpir_block *block, nir_instr *ni)
173 {
174    nir_alu_instr *instr = nir_instr_as_alu(ni);
175 
176    /* gpir_op_mov is useless before the final scheduler, and the scheduler
177     * currently doesn't expect us to emit it. Just register the destination of
178     * this instruction with its source. This will also emit any necessary
179     * register loads/stores for things like "r0 = mov ssa_0" or
180     * "ssa_0 = mov r0".
181     */
182    if (instr->op == nir_op_mov) {
183       gpir_node *child = gpir_node_find(block, &instr->src[0].src,
184                                         instr->src[0].swizzle[0]);
185       register_node(block, child, &instr->dest.dest);
186       return true;
187    }
188 
189    int op = nir_to_gpir_opcodes[instr->op];
190 
191    if (op < 0) {
192       gpir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
193       return false;
194    }
195 
196    gpir_alu_node *node = gpir_node_create(block, op);
197    if (unlikely(!node))
198       return false;
199 
200    unsigned num_child = nir_op_infos[instr->op].num_inputs;
201    assert(num_child <= ARRAY_SIZE(node->children));
202    node->num_child = num_child;
203 
204    for (int i = 0; i < num_child; i++) {
205       nir_alu_src *src = instr->src + i;
206       node->children_negate[i] = src->negate;
207 
208       gpir_node *child = gpir_node_find(block, &src->src, src->swizzle[0]);
209       node->children[i] = child;
210 
211       gpir_node_add_dep(&node->node, child, GPIR_DEP_INPUT);
212    }
213 
214    list_addtail(&node->node.list, &block->node_list);
215    register_node(block, &node->node, &instr->dest.dest);
216 
217    return true;
218 }
219 
gpir_create_load(gpir_block * block,nir_dest * dest,int op,int index,int component)220 static gpir_node *gpir_create_load(gpir_block *block, nir_dest *dest,
221                                    int op, int index, int component)
222 {
223    gpir_load_node *load = gpir_node_create(block, op);
224    if (unlikely(!load))
225       return NULL;
226 
227    load->index = index;
228    load->component = component;
229    list_addtail(&load->node.list, &block->node_list);
230    register_node(block, &load->node, dest);
231    return &load->node;
232 }
233 
gpir_create_vector_load(gpir_block * block,nir_dest * dest,int index)234 static bool gpir_create_vector_load(gpir_block *block, nir_dest *dest, int index)
235 {
236    assert(dest->is_ssa);
237    assert(index < GPIR_VECTOR_SSA_NUM);
238 
239    block->comp->vector_ssa[index].ssa = dest->ssa.index;
240 
241    for (int i = 0; i < dest->ssa.num_components; i++) {
242       gpir_node *node = gpir_create_load(block, dest, gpir_op_load_uniform,
243                                          block->comp->constant_base + index, i);
244       if (!node)
245          return false;
246 
247       block->comp->vector_ssa[index].nodes[i] = node;
248       snprintf(node->name, sizeof(node->name), "ssa%d.%c", dest->ssa.index, "xyzw"[i]);
249    }
250 
251    return true;
252 }
253 
gpir_emit_intrinsic(gpir_block * block,nir_instr * ni)254 static bool gpir_emit_intrinsic(gpir_block *block, nir_instr *ni)
255 {
256    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
257 
258    switch (instr->intrinsic) {
259    case nir_intrinsic_load_input:
260       return gpir_create_load(block, &instr->dest,
261                               gpir_op_load_attribute,
262                               nir_intrinsic_base(instr),
263                               nir_intrinsic_component(instr)) != NULL;
264    case nir_intrinsic_load_uniform:
265    {
266       int offset = nir_intrinsic_base(instr);
267       offset += (int)nir_src_as_float(instr->src[0]);
268 
269       return gpir_create_load(block, &instr->dest,
270                               gpir_op_load_uniform,
271                               offset / 4, offset % 4) != NULL;
272    }
273    case nir_intrinsic_load_viewport_scale:
274       return gpir_create_vector_load(block, &instr->dest, GPIR_VECTOR_SSA_VIEWPORT_SCALE);
275    case nir_intrinsic_load_viewport_offset:
276       return gpir_create_vector_load(block, &instr->dest, GPIR_VECTOR_SSA_VIEWPORT_OFFSET);
277    case nir_intrinsic_store_output:
278    {
279       gpir_store_node *store = gpir_node_create(block, gpir_op_store_varying);
280       if (unlikely(!store))
281          return false;
282       gpir_node *child = gpir_node_find(block, instr->src, 0);
283       store->child = child;
284       store->index = nir_intrinsic_base(instr);
285       store->component = nir_intrinsic_component(instr);
286 
287       gpir_node_add_dep(&store->node, child, GPIR_DEP_INPUT);
288       list_addtail(&store->node.list, &block->node_list);
289 
290       return true;
291    }
292    default:
293       gpir_error("unsupported nir_intrinsic_instr %s\n",
294                  nir_intrinsic_infos[instr->intrinsic].name);
295       return false;
296    }
297 }
298 
gpir_emit_load_const(gpir_block * block,nir_instr * ni)299 static bool gpir_emit_load_const(gpir_block *block, nir_instr *ni)
300 {
301    nir_load_const_instr *instr = nir_instr_as_load_const(ni);
302    gpir_const_node *node = gpir_node_create(block, gpir_op_const);
303    if (unlikely(!node))
304       return false;
305 
306    assert(instr->def.bit_size == 32);
307    assert(instr->def.num_components == 1);
308 
309    node->value.i = instr->value[0].i32;
310 
311    list_addtail(&node->node.list, &block->node_list);
312    register_node_ssa(block, &node->node, &instr->def);
313    return true;
314 }
315 
gpir_emit_ssa_undef(gpir_block * block,nir_instr * ni)316 static bool gpir_emit_ssa_undef(gpir_block *block, nir_instr *ni)
317 {
318    gpir_error("nir_ssa_undef_instr is not supported\n");
319    return false;
320 }
321 
gpir_emit_tex(gpir_block * block,nir_instr * ni)322 static bool gpir_emit_tex(gpir_block *block, nir_instr *ni)
323 {
324    gpir_error("texture operations are not supported\n");
325    return false;
326 }
327 
gpir_emit_jump(gpir_block * block,nir_instr * ni)328 static bool gpir_emit_jump(gpir_block *block, nir_instr *ni)
329 {
330    /* Jumps are emitted at the end of the basic block, so do nothing. */
331    return true;
332 }
333 
334 static bool (*gpir_emit_instr[nir_instr_type_phi])(gpir_block *, nir_instr *) = {
335    [nir_instr_type_alu]        = gpir_emit_alu,
336    [nir_instr_type_intrinsic]  = gpir_emit_intrinsic,
337    [nir_instr_type_load_const] = gpir_emit_load_const,
338    [nir_instr_type_ssa_undef]  = gpir_emit_ssa_undef,
339    [nir_instr_type_tex]        = gpir_emit_tex,
340    [nir_instr_type_jump]       = gpir_emit_jump,
341 };
342 
gpir_emit_function(gpir_compiler * comp,nir_function_impl * impl)343 static bool gpir_emit_function(gpir_compiler *comp, nir_function_impl *impl)
344 {
345    nir_index_blocks(impl);
346    comp->blocks = ralloc_array(comp, gpir_block *, impl->num_blocks);
347 
348    nir_foreach_block(block_nir, impl) {
349       gpir_block *block = ralloc(comp, gpir_block);
350       if (!block)
351          return false;
352 
353       list_inithead(&block->node_list);
354       list_inithead(&block->instr_list);
355 
356       list_addtail(&block->list, &comp->block_list);
357       block->comp = comp;
358       comp->blocks[block_nir->index] = block;
359    }
360 
361    nir_foreach_block(block_nir, impl) {
362       gpir_block *block = comp->blocks[block_nir->index];
363       nir_foreach_instr(instr, block_nir) {
364          assert(instr->type < nir_instr_type_phi);
365          if (!gpir_emit_instr[instr->type](block, instr))
366             return false;
367       }
368 
369       if (block_nir->successors[0] == impl->end_block)
370          block->successors[0] = NULL;
371       else
372          block->successors[0] = comp->blocks[block_nir->successors[0]->index];
373       block->successors[1] = NULL;
374 
375       if (block_nir->successors[1] != NULL) {
376          nir_if *nif = nir_cf_node_as_if(nir_cf_node_next(&block_nir->cf_node));
377          gpir_alu_node *cond = gpir_node_create(block, gpir_op_not);
378          cond->children[0] = gpir_node_find(block, &nif->condition, 0);
379 
380          gpir_node_add_dep(&cond->node, cond->children[0], GPIR_DEP_INPUT);
381          list_addtail(&cond->node.list, &block->node_list);
382 
383          gpir_branch_node *branch = gpir_node_create(block, gpir_op_branch_cond);
384          list_addtail(&branch->node.list, &block->node_list);
385 
386          branch->dest = comp->blocks[block_nir->successors[1]->index];
387          block->successors[1] = branch->dest;
388 
389          branch->cond = &cond->node;
390          gpir_node_add_dep(&branch->node, &cond->node, GPIR_DEP_INPUT);
391 
392          assert(block_nir->successors[0]->index == block_nir->index + 1);
393       } else if (block_nir->successors[0]->index != block_nir->index + 1) {
394          gpir_branch_node *branch = gpir_node_create(block, gpir_op_branch_uncond);
395          list_addtail(&branch->node.list, &block->node_list);
396 
397          branch->dest = comp->blocks[block_nir->successors[0]->index];
398       }
399    }
400 
401    return true;
402 }
403 
gpir_compiler_create(void * prog,unsigned num_reg,unsigned num_ssa)404 static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
405 {
406    gpir_compiler *comp = rzalloc(prog, gpir_compiler);
407 
408    list_inithead(&comp->block_list);
409    list_inithead(&comp->reg_list);
410 
411    for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++)
412       comp->vector_ssa[i].ssa = -1;
413 
414    comp->node_for_ssa = rzalloc_array(comp, gpir_node *, num_ssa);
415    comp->node_for_reg = rzalloc_array(comp, gpir_node *, num_reg);
416    comp->reg_for_ssa = rzalloc_array(comp, gpir_reg *, num_ssa);
417    comp->reg_for_reg = rzalloc_array(comp, gpir_reg *, num_reg);
418    comp->prog = prog;
419    return comp;
420 }
421 
gpir_glsl_type_size(enum glsl_base_type type)422 static int gpir_glsl_type_size(enum glsl_base_type type)
423 {
424    /* only support GLSL_TYPE_FLOAT */
425    assert(type == GLSL_TYPE_FLOAT);
426    return 4;
427 }
428 
gpir_print_shader_db(struct nir_shader * nir,gpir_compiler * comp,struct pipe_debug_callback * debug)429 static void gpir_print_shader_db(struct nir_shader *nir, gpir_compiler *comp,
430                                  struct pipe_debug_callback *debug)
431 {
432    const struct shader_info *info = &nir->info;
433    char *shaderdb;
434    ASSERTED int ret = asprintf(&shaderdb,
435                                "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
436                                gl_shader_stage_name(info->stage),
437                                comp->num_instr,
438                                comp->num_loops,
439                                comp->num_spills,
440                                comp->num_fills);
441    assert(ret >= 0);
442 
443    if (lima_debug & LIMA_DEBUG_SHADERDB)
444       fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
445 
446    pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
447    free(shaderdb);
448 }
449 
gpir_compile_nir(struct lima_vs_compiled_shader * prog,struct nir_shader * nir,struct pipe_debug_callback * debug)450 bool gpir_compile_nir(struct lima_vs_compiled_shader *prog, struct nir_shader *nir,
451                       struct pipe_debug_callback *debug)
452 {
453    nir_function_impl *func = nir_shader_get_entrypoint(nir);
454    gpir_compiler *comp = gpir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
455    if (!comp)
456       return false;
457 
458    comp->constant_base = nir->num_uniforms;
459    prog->state.uniform_size = nir->num_uniforms * 16;
460    prog->state.gl_pos_idx = 0;
461    prog->state.point_size_idx = -1;
462 
463    if (!gpir_emit_function(comp, func))
464       goto err_out0;
465 
466    gpir_node_print_prog_seq(comp);
467    gpir_node_print_prog_dep(comp);
468 
469    /* increase for viewport uniforms */
470    comp->constant_base += GPIR_VECTOR_SSA_NUM;
471 
472    if (!gpir_optimize(comp))
473       goto err_out0;
474 
475    if (!gpir_pre_rsched_lower_prog(comp))
476       goto err_out0;
477 
478    if (!gpir_reduce_reg_pressure_schedule_prog(comp))
479       goto err_out0;
480 
481    if (!gpir_regalloc_prog(comp))
482       goto err_out0;
483 
484    if (!gpir_schedule_prog(comp))
485       goto err_out0;
486 
487    if (!gpir_codegen_prog(comp))
488       goto err_out0;
489 
490    /* initialize to support accumulating below */
491    nir_foreach_shader_out_variable(var, nir) {
492       struct lima_varying_info *v = prog->state.varying + var->data.driver_location;
493       v->components = 0;
494    }
495 
496    nir_foreach_shader_out_variable(var, nir) {
497       bool varying = true;
498       switch (var->data.location) {
499       case VARYING_SLOT_POS:
500          prog->state.gl_pos_idx = var->data.driver_location;
501          varying = false;
502          break;
503       case VARYING_SLOT_PSIZ:
504          prog->state.point_size_idx = var->data.driver_location;
505          varying = false;
506          break;
507       }
508 
509       struct lima_varying_info *v = prog->state.varying + var->data.driver_location;
510       if (!v->components) {
511          v->component_size = gpir_glsl_type_size(glsl_get_base_type(var->type));
512          prog->state.num_outputs++;
513          if (varying)
514             prog->state.num_varyings++;
515       }
516 
517       v->components += glsl_get_components(var->type);
518    }
519 
520    gpir_print_shader_db(nir, comp, debug);
521 
522    ralloc_free(comp);
523    return true;
524 
525 err_out0:
526    ralloc_free(comp);
527    return false;
528 }
529 
530