1 /*
2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <jonathan@marek.ca>
25  */
26 
27 #include "ir2_private.h"
28 
29 static bool
is_mov(struct ir2_instr * instr)30 is_mov(struct ir2_instr *instr)
31 {
32    return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
33           instr->src_count == 1;
34 }
35 
36 static void
src_combine(struct ir2_src * src,struct ir2_src b)37 src_combine(struct ir2_src *src, struct ir2_src b)
38 {
39    src->num = b.num;
40    src->type = b.type;
41    src->swizzle = swiz_merge(b.swizzle, src->swizzle);
42    if (!src->abs) /* if we have abs we don't care about previous negate */
43       src->negate ^= b.negate;
44    src->abs |= b.abs;
45 }
46 
47 /* cp_src: replace src regs when they refer to a mov instruction
48  * example:
49  *	ALU:      MAXv    R7 = C7, C7
50  *	ALU:      MULADDv R7 = R7, R10, R0.xxxx
51  * becomes:
52  *	ALU:      MULADDv R7 = C7, R10, R0.xxxx
53  */
54 void
cp_src(struct ir2_context * ctx)55 cp_src(struct ir2_context *ctx)
56 {
57    struct ir2_instr *p;
58 
59    ir2_foreach_instr (instr, ctx) {
60       ir2_foreach_src (src, instr) {
61          /* loop to replace recursively */
62          do {
63             if (src->type != IR2_SRC_SSA)
64                break;
65 
66             p = &ctx->instr[src->num];
67             /* don't work across blocks to avoid possible issues */
68             if (p->block_idx != instr->block_idx)
69                break;
70 
71             if (!is_mov(p))
72                break;
73 
74             if (p->alu.saturate)
75                break;
76 
77             /* cant apply abs to const src, const src only for alu */
78             if (p->src[0].type == IR2_SRC_CONST &&
79                 (src->abs || instr->type != IR2_ALU))
80                break;
81 
82             src_combine(src, p->src[0]);
83          } while (1);
84       }
85    }
86 }
87 
88 /* cp_export: replace mov to export when possible
89  * in the cp_src pass we bypass any mov instructions related
90  * to the src registers, but for exports for need something different
91  * example:
92  *	ALU:      MAXv    R3.x___ = C9.x???, C9.x???
93  *	ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
94  *	ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
95  * becomes:
96  *	ALU:      MAXv    export0.___w = C9.???x, C9.???x
97  *	ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
98  *
99  */
100 void
cp_export(struct ir2_context * ctx)101 cp_export(struct ir2_context *ctx)
102 {
103    struct ir2_instr *c[4], *ins[4];
104    struct ir2_src *src;
105    struct ir2_reg *reg;
106    unsigned ncomp;
107 
108    ir2_foreach_instr (instr, ctx) {
109       if (!is_export(instr)) /* TODO */
110          continue;
111 
112       if (!is_mov(instr))
113          continue;
114 
115       src = &instr->src[0];
116 
117       if (src->negate || src->abs) /* TODO handle these cases */
118          continue;
119 
120       if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
121          continue;
122 
123       reg = get_reg_src(ctx, src);
124       ncomp = dst_ncomp(instr);
125 
126       unsigned reswiz[4] = {};
127       unsigned num_instr = 0;
128 
129       /* fill array c with pointers to instrs that write each component */
130       if (src->type == IR2_SRC_SSA) {
131          struct ir2_instr *instr = &ctx->instr[src->num];
132 
133          if (instr->type != IR2_ALU)
134             continue;
135 
136          for (int i = 0; i < ncomp; i++)
137             c[i] = instr;
138 
139          ins[num_instr++] = instr;
140          reswiz[0] = src->swizzle;
141       } else {
142          bool ok = true;
143          unsigned write_mask = 0;
144 
145          ir2_foreach_instr (instr, ctx) {
146             if (instr->is_ssa || instr->reg != reg)
147                continue;
148 
149             /* set by non-ALU */
150             if (instr->type != IR2_ALU) {
151                ok = false;
152                break;
153             }
154 
155             /* component written more than once */
156             if (write_mask & instr->alu.write_mask) {
157                ok = false;
158                break;
159             }
160 
161             write_mask |= instr->alu.write_mask;
162 
163             /* src pointers for components */
164             for (int i = 0, j = 0; i < 4; i++) {
165                unsigned k = swiz_get(src->swizzle, i);
166                if (instr->alu.write_mask & 1 << k) {
167                   c[i] = instr;
168 
169                   /* reswiz = compressed src->swizzle */
170                   unsigned x = 0;
171                   for (int i = 0; i < k; i++)
172                      x += !!(instr->alu.write_mask & 1 << i);
173 
174                   assert(src->swizzle || x == j);
175                   reswiz[num_instr] |= swiz_set(x, j++);
176                }
177             }
178             ins[num_instr++] = instr;
179          }
180          if (!ok)
181             continue;
182       }
183 
184       bool redirect = true;
185 
186       /* must all be in same block */
187       for (int i = 0; i < ncomp; i++)
188          redirect &= (c[i]->block_idx == instr->block_idx);
189 
190       /* no other instr using the value */
191       ir2_foreach_instr (p, ctx) {
192          if (p == instr)
193             continue;
194          ir2_foreach_src (src, p)
195             redirect &= reg != get_reg_src(ctx, src);
196       }
197 
198       if (!redirect)
199          continue;
200 
201       /* redirect the instructions writing to the register */
202       for (int i = 0; i < num_instr; i++) {
203          struct ir2_instr *p = ins[i];
204 
205          p->alu.export = instr->alu.export;
206          p->alu.write_mask = 0;
207          p->is_ssa = true;
208          p->ssa.ncomp = 0;
209          memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
210          p->alu.saturate |= instr->alu.saturate;
211 
212          switch (p->alu.vector_opc) {
213          case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
214          case DOT2ADDv:
215          case DOT3v:
216          case DOT4v:
217          case CUBEv:
218             continue;
219          default:
220             break;
221          }
222          ir2_foreach_src (s, p)
223             swiz_merge_p(&s->swizzle, reswiz[i]);
224       }
225 
226       for (int i = 0; i < ncomp; i++) {
227          c[i]->alu.write_mask |= (1 << i);
228          c[i]->ssa.ncomp++;
229       }
230       instr->type = IR2_NONE;
231       instr->need_emit = false;
232    }
233 }
234