1 /*
2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <jonathan@marek.ca>
25  */
26 
27 #include <assert.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include "ir2/instr-a2xx.h"
34 #include "fd2_program.h"
35 #include "ir2.h"
36 
37 enum ir2_src_type {
38    IR2_SRC_SSA,
39    IR2_SRC_REG,
40    IR2_SRC_INPUT,
41    IR2_SRC_CONST,
42 };
43 
44 struct ir2_src {
45    /* num can mean different things
46     *   ssa: index of instruction
47     *   reg: index in ctx->reg array
48     *   input: index in ctx->input array
49     *   const: constant index (C0, C1, etc)
50     */
51    uint16_t num;
52    uint8_t swizzle;
53    enum ir2_src_type type : 2;
54    uint8_t abs : 1;
55    uint8_t negate : 1;
56    uint8_t : 4;
57 };
58 
59 struct ir2_reg_component {
60    uint8_t c : 3;     /* assigned x/y/z/w (7=dont write, for fetch instr) */
61    bool alloc : 1;    /* is it currently allocated */
62    uint8_t ref_count; /* for ra */
63 };
64 
65 struct ir2_reg {
66    uint8_t idx; /* assigned hardware register */
67    uint8_t ncomp;
68 
69    uint8_t loop_depth;
70    bool initialized;
71    /* block_idx to free on (-1 = free on ref_count==0) */
72    int block_idx_free;
73    struct ir2_reg_component comp[4];
74 };
75 
76 struct ir2_instr {
77    unsigned idx;
78 
79    unsigned block_idx;
80 
81    enum {
82       IR2_NONE,
83       IR2_FETCH,
84       IR2_ALU,
85       IR2_CF,
86    } type : 2;
87 
88    /* instruction needs to be emitted (for scheduling) */
89    bool need_emit : 1;
90 
91    /* predicate value - (usually) same for entire block */
92    uint8_t pred : 2;
93 
94    /* src */
95    uint8_t src_count;
96    struct ir2_src src[4];
97 
98    /* dst */
99    bool is_ssa;
100    union {
101       struct ir2_reg ssa;
102       struct ir2_reg *reg;
103    };
104 
105    /* type-specific */
106    union {
107       struct {
108          instr_fetch_opc_t opc : 5;
109          union {
110             struct {
111                uint8_t const_idx;
112                uint8_t const_idx_sel;
113             } vtx;
114             struct {
115                bool is_cube : 1;
116                bool is_rect : 1;
117                uint8_t samp_id;
118             } tex;
119          };
120       } fetch;
121       struct {
122          /* store possible opcs, then we can choose vector/scalar instr */
123          instr_scalar_opc_t scalar_opc : 6;
124          instr_vector_opc_t vector_opc : 5;
125          /* same as nir */
126          uint8_t write_mask : 4;
127          bool saturate : 1;
128 
129          /* export idx (-1 no export) */
130          int8_t export;
131 
132          /* for scalarized 2 src instruction */
133          uint8_t src1_swizzle;
134       } alu;
135       struct {
136          /* jmp dst block_idx */
137          uint8_t block_idx;
138       } cf;
139    };
140 };
141 
142 struct ir2_sched_instr {
143    uint32_t reg_state[8];
144    struct ir2_instr *instr, *instr_s;
145 };
146 
147 struct ir2_context {
148    struct fd2_shader_stateobj *so;
149 
150    unsigned block_idx, pred_idx;
151    uint8_t pred;
152    bool block_has_jump[64];
153 
154    unsigned loop_last_block[64];
155    unsigned loop_depth;
156 
157    nir_shader *nir;
158 
159    /* ssa index of position output */
160    struct ir2_src position;
161 
162    /* to translate SSA ids to instruction ids */
163    int16_t ssa_map[1024];
164 
165    struct ir2_shader_info *info;
166    struct ir2_frag_linkage *f;
167 
168    int prev_export;
169 
170    /* RA state */
171    struct ir2_reg *live_regs[64];
172    uint32_t reg_state[256 / 32]; /* 64*4 bits */
173 
174    /* inputs */
175    struct ir2_reg input[16 + 1]; /* 16 + param */
176 
177    /* non-ssa regs */
178    struct ir2_reg reg[64];
179    unsigned reg_count;
180 
181    struct ir2_instr instr[0x300];
182    unsigned instr_count;
183 
184    struct ir2_sched_instr instr_sched[0x180];
185    unsigned instr_sched_count;
186 };
187 
188 void assemble(struct ir2_context *ctx, bool binning);
189 
190 void ir2_nir_compile(struct ir2_context *ctx, bool binning);
191 bool ir2_nir_lower_scalar(nir_shader *shader);
192 
193 void ra_count_refs(struct ir2_context *ctx);
194 void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
195             bool export, uint8_t export_writemask);
196 void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
197 void ra_block_free(struct ir2_context *ctx, unsigned block);
198 
199 void cp_src(struct ir2_context *ctx);
200 void cp_export(struct ir2_context *ctx);
201 
202 /* utils */
203 enum {
204    IR2_SWIZZLE_Y = 1 << 0,
205    IR2_SWIZZLE_Z = 2 << 0,
206    IR2_SWIZZLE_W = 3 << 0,
207 
208    IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
209 
210    IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
211 
212    IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
213    IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
214    IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
215    IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
216    IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
217    IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
218    IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
219    IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
220 };
221 
222 #define compile_error(ctx, args...)                                            \
223    ({                                                                          \
224       printf(args);                                                            \
225       assert(0);                                                               \
226    })
227 
228 static inline struct ir2_src
ir2_src(uint16_t num,uint8_t swizzle,enum ir2_src_type type)229 ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
230 {
231    return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};
232 }
233 
234 /* ir2_assemble uses it .. */
235 struct ir2_src ir2_zero(struct ir2_context *ctx);
236 
237 #define ir2_foreach_instr(it, ctx)                                             \
238    for (struct ir2_instr *it = (ctx)->instr; ({                                \
239            while (it != &(ctx)->instr[(ctx)->instr_count] &&                   \
240                   it->type == IR2_NONE)                                        \
241               it++;                                                            \
242            it != &(ctx)->instr[(ctx)->instr_count];                            \
243         });                                                                    \
244         it++)
245 
246 #define ir2_foreach_live_reg(it, ctx)                                          \
247    for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({                     \
248            while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL)            \
249               __ptr++;                                                         \
250            __ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL;              \
251         });                                                                    \
252         it++)
253 
254 #define ir2_foreach_avail(it)                                                  \
255    for (struct ir2_instr **__instrp = avail, *it;                              \
256         it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
257 
258 #define ir2_foreach_src(it, instr)                                             \
259    for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count];  \
260         it++)
261 
262 /* mask for register allocation
263  * 64 registers with 4 components each = 256 bits
264  */
265 /* typedef struct {
266         uint64_t data[4];
267 } regmask_t; */
268 
269 static inline bool
mask_isset(uint32_t * mask,unsigned num)270 mask_isset(uint32_t *mask, unsigned num)
271 {
272    return !!(mask[num / 32] & 1 << num % 32);
273 }
274 
275 static inline void
mask_set(uint32_t * mask,unsigned num)276 mask_set(uint32_t *mask, unsigned num)
277 {
278    mask[num / 32] |= 1 << num % 32;
279 }
280 
281 static inline void
mask_unset(uint32_t * mask,unsigned num)282 mask_unset(uint32_t *mask, unsigned num)
283 {
284    mask[num / 32] &= ~(1 << num % 32);
285 }
286 
287 static inline unsigned
mask_reg(uint32_t * mask,unsigned num)288 mask_reg(uint32_t *mask, unsigned num)
289 {
290    return mask[num / 8] >> num % 8 * 4 & 0xf;
291 }
292 
293 static inline bool
is_export(struct ir2_instr * instr)294 is_export(struct ir2_instr *instr)
295 {
296    return instr->type == IR2_ALU && instr->alu.export >= 0;
297 }
298 
299 static inline instr_alloc_type_t
export_buf(unsigned num)300 export_buf(unsigned num)
301 {
302    return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;
303 }
304 
305 /* component c for channel i */
306 static inline unsigned
swiz_set(unsigned c,unsigned i)307 swiz_set(unsigned c, unsigned i)
308 {
309    return ((c - i) & 3) << i * 2;
310 }
311 
312 /* get swizzle in channel i */
313 static inline unsigned
swiz_get(unsigned swiz,unsigned i)314 swiz_get(unsigned swiz, unsigned i)
315 {
316    return ((swiz >> i * 2) + i) & 3;
317 }
318 
319 static inline unsigned
swiz_merge(unsigned swiz0,unsigned swiz1)320 swiz_merge(unsigned swiz0, unsigned swiz1)
321 {
322    unsigned swiz = 0;
323    for (int i = 0; i < 4; i++)
324       swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
325    return swiz;
326 }
327 
328 static inline void
swiz_merge_p(uint8_t * swiz0,unsigned swiz1)329 swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
330 {
331    unsigned swiz = 0;
332    for (int i = 0; i < 4; i++)
333       swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
334    *swiz0 = swiz;
335 }
336 
337 static inline struct ir2_reg *
get_reg(struct ir2_instr * instr)338 get_reg(struct ir2_instr *instr)
339 {
340    return instr->is_ssa ? &instr->ssa : instr->reg;
341 }
342 
343 static inline struct ir2_reg *
get_reg_src(struct ir2_context * ctx,struct ir2_src * src)344 get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
345 {
346    switch (src->type) {
347    case IR2_SRC_INPUT:
348       return &ctx->input[src->num];
349    case IR2_SRC_SSA:
350       return &ctx->instr[src->num].ssa;
351    case IR2_SRC_REG:
352       return &ctx->reg[src->num];
353    default:
354       return NULL;
355    }
356 }
357 
358 /* gets a ncomp value for the dst */
359 static inline unsigned
dst_ncomp(struct ir2_instr * instr)360 dst_ncomp(struct ir2_instr *instr)
361 {
362    if (instr->is_ssa)
363       return instr->ssa.ncomp;
364 
365    if (instr->type == IR2_FETCH)
366       return instr->reg->ncomp;
367 
368    assert(instr->type == IR2_ALU);
369 
370    unsigned ncomp = 0;
371    for (int i = 0; i < instr->reg->ncomp; i++)
372       ncomp += !!(instr->alu.write_mask & 1 << i);
373    return ncomp;
374 }
375 
376 /* gets a ncomp value for the src registers */
377 static inline unsigned
src_ncomp(struct ir2_instr * instr)378 src_ncomp(struct ir2_instr *instr)
379 {
380    if (instr->type == IR2_FETCH) {
381       switch (instr->fetch.opc) {
382       case VTX_FETCH:
383          return 1;
384       case TEX_FETCH:
385          return instr->fetch.tex.is_cube ? 3 : 2;
386       case TEX_SET_TEX_LOD:
387          return 1;
388       default:
389          assert(0);
390       }
391    }
392 
393    switch (instr->alu.scalar_opc) {
394    case PRED_SETEs ... KILLONEs:
395       return 1;
396    default:
397       break;
398    }
399 
400    switch (instr->alu.vector_opc) {
401    case DOT2ADDv:
402       return 2;
403    case DOT3v:
404       return 3;
405    case DOT4v:
406    case CUBEv:
407    case PRED_SETE_PUSHv:
408       return 4;
409    default:
410       return dst_ncomp(instr);
411    }
412 }
413