1 /*
2  * Copyright (c) 2019 Connor Abbott <cwabbott0@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "lima_ir.h"
28 
29 /* This pass clones certain input intrinsics, creating a copy for each user.
30  * Inputs are relatively cheap, since in both PP and GP one input can be
31  * loaded "for free" in each instruction bundle. In GP especially, if there is
32  * a load instruction with multiple uses in different basic blocks, we need to
33  * split it in NIR so that we don't generate a register write and reads for
34  * it, which is almost certainly more expensive than splitting. Hence this
35  * pass is more aggressive than nir_opt_move, which just moves the intrinsic
36  * down but won't split it.
37  */
38 
39 static nir_ssa_def *
clone_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)40 clone_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
41 {
42    nir_intrinsic_instr *new_intrin =
43       nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
44 
45    assert(new_intrin->dest.is_ssa);
46 
47    unsigned num_srcs = nir_intrinsic_infos[new_intrin->intrinsic].num_srcs;
48    for (unsigned i = 0; i < num_srcs; i++) {
49       assert(new_intrin->src[i].is_ssa);
50    }
51 
52    nir_builder_instr_insert(b, &new_intrin->instr);
53 
54    return &new_intrin->dest.ssa;
55 }
56 
57 static bool
replace_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)58 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
59 {
60    if (!intrin->dest.is_ssa)
61       return false;
62 
63    if (intrin->intrinsic != nir_intrinsic_load_input &&
64        intrin->intrinsic != nir_intrinsic_load_uniform)
65       return false;
66 
67    if (!intrin->src[0].is_ssa)
68       return false;
69 
70    if (intrin->src[0].ssa->parent_instr->type == nir_instr_type_load_const)
71       return false;
72 
73    struct hash_table *visited_instrs = _mesa_pointer_hash_table_create(NULL);
74 
75    nir_foreach_use_safe(src, &intrin->dest.ssa) {
76       struct hash_entry *entry =
77          _mesa_hash_table_search(visited_instrs, src->parent_instr);
78       if (entry && (src->parent_instr->type != nir_instr_type_phi)) {
79          nir_ssa_def *def = entry->data;
80          nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(def));
81          continue;
82       }
83       b->cursor = nir_before_src(src, false);
84       nir_ssa_def *new = clone_intrinsic(b, intrin);
85       nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new));
86       _mesa_hash_table_insert(visited_instrs, src->parent_instr, new);
87    }
88    nir_foreach_if_use_safe(src, &intrin->dest.ssa) {
89       b->cursor = nir_before_src(src, true);
90       nir_if_rewrite_condition(src->parent_if,
91                                nir_src_for_ssa(clone_intrinsic(b, intrin)));
92    }
93 
94    nir_instr_remove(&intrin->instr);
95    _mesa_hash_table_destroy(visited_instrs, NULL);
96    return true;
97 }
98 
99 static void
replace_load_const(nir_builder * b,nir_load_const_instr * load_const)100 replace_load_const(nir_builder *b, nir_load_const_instr *load_const)
101 {
102    struct hash_table *visited_instrs = _mesa_pointer_hash_table_create(NULL);
103 
104    nir_foreach_use_safe(src, &load_const->def) {
105       struct hash_entry *entry =
106          _mesa_hash_table_search(visited_instrs, src->parent_instr);
107       if (entry && (src->parent_instr->type != nir_instr_type_phi)) {
108          nir_ssa_def *def = entry->data;
109          nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(def));
110          continue;
111       }
112       b->cursor = nir_before_src(src, false);
113       nir_ssa_def *new = nir_build_imm(b, load_const->def.num_components,
114                                        load_const->def.bit_size,
115                                        load_const->value);
116       nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(new));
117       _mesa_hash_table_insert(visited_instrs, src->parent_instr, new);
118    }
119 
120    nir_instr_remove(&load_const->instr);
121    _mesa_hash_table_destroy(visited_instrs, NULL);
122 }
123 
124 bool
lima_nir_split_loads(nir_shader * shader)125 lima_nir_split_loads(nir_shader *shader)
126 {
127    bool progress = false;
128 
129    nir_foreach_function(function, shader) {
130       if (function->impl) {
131          nir_builder b;
132          nir_builder_init(&b, function->impl);
133 
134          nir_foreach_block_reverse(block, function->impl) {
135             nir_foreach_instr_reverse_safe(instr, block) {
136                if (instr->type == nir_instr_type_load_const) {
137                   replace_load_const(&b, nir_instr_as_load_const(instr));
138                   progress = true;
139                } else if (instr->type == nir_instr_type_intrinsic) {
140                   progress |= replace_intrinsic(&b, nir_instr_as_intrinsic(instr));
141                }
142             }
143          }
144       }
145    }
146 
147    return progress;
148 }
149 
150