1 /*
2  * Copyright (C) 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "ir3.h"
25 
26 /* Lower several macro-instructions needed for shader subgroup support that
27  * must be turned into if statements. We do this after RA and post-RA
28  * scheduling to give the scheduler a chance to rearrange them, because RA
29  * may need to insert OPC_META_READ_FIRST to handle splitting live ranges, and
30  * also because some (e.g. BALLOT and READ_FIRST) must produce a shared
31  * register that cannot be spilled to a normal register until after the if,
32  * which makes implementing spilling more complicated if they are already
33  * lowered.
34  */
35 
36 static void
replace_pred(struct ir3_block * block,struct ir3_block * old_pred,struct ir3_block * new_pred)37 replace_pred(struct ir3_block *block, struct ir3_block *old_pred,
38              struct ir3_block *new_pred)
39 {
40    for (unsigned i = 0; i < block->predecessors_count; i++) {
41       if (block->predecessors[i] == old_pred) {
42          block->predecessors[i] = new_pred;
43          return;
44       }
45    }
46 }
47 
48 static void
replace_physical_pred(struct ir3_block * block,struct ir3_block * old_pred,struct ir3_block * new_pred)49 replace_physical_pred(struct ir3_block *block, struct ir3_block *old_pred,
50                       struct ir3_block *new_pred)
51 {
52    for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
53       if (block->physical_predecessors[i] == old_pred) {
54          block->physical_predecessors[i] = new_pred;
55          return;
56       }
57    }
58 }
59 
60 static void
mov_immed(struct ir3_register * dst,struct ir3_block * block,unsigned immed)61 mov_immed(struct ir3_register *dst, struct ir3_block *block, unsigned immed)
62 {
63    struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
64    struct ir3_register *mov_dst = ir3_dst_create(mov, dst->num, dst->flags);
65    mov_dst->wrmask = dst->wrmask;
66    struct ir3_register *src = ir3_src_create(
67       mov, INVALID_REG, (dst->flags & IR3_REG_HALF) | IR3_REG_IMMED);
68    src->uim_val = immed;
69    mov->cat1.dst_type = (dst->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
70    mov->cat1.src_type = mov->cat1.dst_type;
71    mov->repeat = util_last_bit(mov_dst->wrmask) - 1;
72 }
73 
74 static struct ir3_block *
split_block(struct ir3 * ir,struct ir3_block * before_block,struct ir3_instruction * instr,struct ir3_block ** then)75 split_block(struct ir3 *ir, struct ir3_block *before_block,
76             struct ir3_instruction *instr, struct ir3_block **then)
77 {
78    struct ir3_block *then_block = ir3_block_create(ir);
79    struct ir3_block *after_block = ir3_block_create(ir);
80    list_add(&then_block->node, &before_block->node);
81    list_add(&after_block->node, &then_block->node);
82 
83    for (unsigned i = 0; i < ARRAY_SIZE(before_block->successors); i++) {
84       after_block->successors[i] = before_block->successors[i];
85       if (after_block->successors[i])
86          replace_pred(after_block->successors[i], before_block, after_block);
87    }
88 
89    for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors);
90         i++) {
91       after_block->physical_successors[i] =
92          before_block->physical_successors[i];
93       if (after_block->physical_successors[i]) {
94          replace_physical_pred(after_block->physical_successors[i],
95                                before_block, after_block);
96       }
97    }
98 
99    before_block->successors[0] = then_block;
100    before_block->successors[1] = after_block;
101    before_block->physical_successors[0] = then_block;
102    before_block->physical_successors[1] = after_block;
103    ir3_block_add_predecessor(then_block, before_block);
104    ir3_block_add_predecessor(after_block, before_block);
105    ir3_block_add_physical_predecessor(then_block, before_block);
106    ir3_block_add_physical_predecessor(after_block, before_block);
107 
108    then_block->successors[0] = after_block;
109    then_block->physical_successors[0] = after_block;
110    ir3_block_add_predecessor(after_block, then_block);
111    ir3_block_add_physical_predecessor(after_block, then_block);
112 
113    foreach_instr_from_safe (rem_instr, &instr->node,
114                             &before_block->instr_list) {
115       list_del(&rem_instr->node);
116       list_addtail(&rem_instr->node, &after_block->instr_list);
117       rem_instr->block = after_block;
118    }
119 
120    after_block->brtype = before_block->brtype;
121    after_block->condition = before_block->condition;
122 
123    *then = then_block;
124    return after_block;
125 }
126 
127 static bool
lower_block(struct ir3 * ir,struct ir3_block ** block)128 lower_block(struct ir3 *ir, struct ir3_block **block)
129 {
130    bool progress = false;
131 
132    foreach_instr_safe (instr, &(*block)->instr_list) {
133       switch (instr->opc) {
134       case OPC_BALLOT_MACRO:
135       case OPC_ANY_MACRO:
136       case OPC_ALL_MACRO:
137       case OPC_ELECT_MACRO:
138       case OPC_READ_COND_MACRO:
139       case OPC_READ_FIRST_MACRO:
140       case OPC_SWZ_SHARED_MACRO:
141          break;
142       default:
143          continue;
144       }
145 
146       struct ir3_block *before_block = *block;
147       struct ir3_block *then_block;
148       struct ir3_block *after_block =
149          split_block(ir, before_block, instr, &then_block);
150 
151       /* For ballot, the destination must be initialized to 0 before we do
152        * the movmsk because the condition may be 0 and then the movmsk will
153        * be skipped. Because it's a shared register we have to wrap the
154        * initialization in a getone block.
155        */
156       if (instr->opc == OPC_BALLOT_MACRO) {
157          before_block->brtype = IR3_BRANCH_GETONE;
158          before_block->condition = NULL;
159          mov_immed(instr->dsts[0], then_block, 0);
160          before_block = after_block;
161          after_block = split_block(ir, before_block, instr, &then_block);
162       }
163 
164       switch (instr->opc) {
165       case OPC_BALLOT_MACRO:
166       case OPC_READ_COND_MACRO:
167       case OPC_ANY_MACRO:
168       case OPC_ALL_MACRO:
169          before_block->condition = instr->srcs[0]->def->instr;
170          break;
171       default:
172          before_block->condition = NULL;
173          break;
174       }
175 
176       switch (instr->opc) {
177       case OPC_BALLOT_MACRO:
178       case OPC_READ_COND_MACRO:
179          before_block->brtype = IR3_BRANCH_COND;
180          break;
181       case OPC_ANY_MACRO:
182          before_block->brtype = IR3_BRANCH_ANY;
183          break;
184       case OPC_ALL_MACRO:
185          before_block->brtype = IR3_BRANCH_ALL;
186          break;
187       case OPC_ELECT_MACRO:
188       case OPC_READ_FIRST_MACRO:
189       case OPC_SWZ_SHARED_MACRO:
190          before_block->brtype = IR3_BRANCH_GETONE;
191          break;
192       default:
193          unreachable("bad opcode");
194       }
195 
196       switch (instr->opc) {
197       case OPC_ALL_MACRO:
198       case OPC_ANY_MACRO:
199       case OPC_ELECT_MACRO:
200          mov_immed(instr->dsts[0], then_block, 1);
201          mov_immed(instr->dsts[0], before_block, 0);
202          break;
203 
204       case OPC_BALLOT_MACRO: {
205          unsigned comp_count = util_last_bit(instr->dsts[0]->wrmask);
206          struct ir3_instruction *movmsk =
207             ir3_instr_create(then_block, OPC_MOVMSK, 1, 0);
208          ir3_dst_create(movmsk, instr->dsts[0]->num, instr->dsts[0]->flags);
209          movmsk->repeat = comp_count - 1;
210          break;
211       }
212 
213       case OPC_READ_COND_MACRO:
214       case OPC_READ_FIRST_MACRO: {
215          struct ir3_instruction *mov =
216             ir3_instr_create(then_block, OPC_MOV, 1, 1);
217          unsigned src = instr->opc == OPC_READ_COND_MACRO ? 1 : 0;
218          ir3_dst_create(mov, instr->dsts[0]->num, instr->dsts[0]->flags);
219          struct ir3_register *new_src = ir3_src_create(mov, 0, 0);
220          *new_src = *instr->srcs[src];
221          mov->cat1.dst_type = TYPE_U32;
222          mov->cat1.src_type =
223             (new_src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
224          break;
225       }
226 
227       case OPC_SWZ_SHARED_MACRO: {
228          struct ir3_instruction *swz =
229             ir3_instr_create(then_block, OPC_SWZ, 2, 2);
230          ir3_dst_create(swz, instr->dsts[0]->num, instr->dsts[0]->flags);
231          ir3_dst_create(swz, instr->dsts[1]->num, instr->dsts[1]->flags);
232          ir3_src_create(swz, instr->srcs[0]->num, instr->srcs[0]->flags);
233          ir3_src_create(swz, instr->srcs[1]->num, instr->srcs[1]->flags);
234          swz->cat1.dst_type = swz->cat1.src_type = TYPE_U32;
235          swz->repeat = 1;
236          break;
237       }
238 
239       default:
240          unreachable("bad opcode");
241       }
242 
243       *block = after_block;
244       list_delinit(&instr->node);
245       progress = true;
246    }
247 
248    return progress;
249 }
250 
251 bool
ir3_lower_subgroups(struct ir3 * ir)252 ir3_lower_subgroups(struct ir3 *ir)
253 {
254    bool progress = false;
255 
256    foreach_block (block, &ir->block_list)
257       progress |= lower_block(ir, &block);
258 
259    return progress;
260 }
261