1 /*
2  * Copyright (C) 2019 Connor Abbott <cwabbott0@gmail.com>
3  * Copyright (C) 2019 Lyude Paul <thatslyude@gmail.com>
4  * Copyright (C) 2019 Ryan Houdek <Sonicadvance1@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 #include <assert.h>
30 #include <inttypes.h>
31 #include <string.h>
32 
33 #include "bifrost.h"
34 #include "disassemble.h"
35 #include "bi_print_common.h"
36 #include "util/compiler.h"
37 #include "util/macros.h"
38 
39 // return bits (high, lo]
bits(uint32_t word,unsigned lo,unsigned high)40 static uint64_t bits(uint32_t word, unsigned lo, unsigned high)
41 {
42         if (high == 32)
43                 return word >> lo;
44         return (word & ((1 << high) - 1)) >> lo;
45 }
46 
47 // each of these structs represents an instruction that's dispatched in one
48 // cycle. Note that these instructions are packed in funny ways within the
49 // clause, hence the need for a separate struct.
50 struct bifrost_alu_inst {
51         uint32_t fma_bits;
52         uint32_t add_bits;
53         uint64_t reg_bits;
54 };
55 
get_reg0(struct bifrost_regs regs)56 static unsigned get_reg0(struct bifrost_regs regs)
57 {
58         if (regs.ctrl == 0)
59                 return regs.reg0 | ((regs.reg1 & 0x1) << 5);
60 
61         return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;
62 }
63 
get_reg1(struct bifrost_regs regs)64 static unsigned get_reg1(struct bifrost_regs regs)
65 {
66         return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;
67 }
68 
69 // this represents the decoded version of the ctrl register field.
70 struct bifrost_reg_ctrl {
71         bool read_reg0;
72         bool read_reg1;
73         struct bifrost_reg_ctrl_23 slot23;
74 };
75 
dump_header(FILE * fp,struct bifrost_header header,bool verbose)76 static void dump_header(FILE *fp, struct bifrost_header header, bool verbose)
77 {
78         fprintf(fp, "ds(%u) ", header.dependency_slot);
79 
80         if (header.staging_barrier)
81                 fprintf(fp, "osrb ");
82 
83         fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));
84 
85         if (header.suppress_inf)
86                 fprintf(fp, "inf_suppress ");
87         if (header.suppress_nan)
88                 fprintf(fp, "nan_suppress ");
89 
90         if (header.flush_to_zero == BIFROST_FTZ_DX11)
91                 fprintf(fp, "ftz_dx11 ");
92         else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)
93                 fprintf(fp, "ftz_hsa ");
94         if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)
95                 fprintf(fp, "ftz_au ");
96 
97         assert(!header.zero1);
98         assert(!header.zero2);
99 
100         if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)
101                 fprintf(fp, "fpe_ts ");
102         else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)
103                 fprintf(fp, "fpe_pd ");
104         else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)
105                 fprintf(fp, "fpe_psqr ");
106 
107         if (header.message_type)
108                 fprintf(fp, "%s ", bi_message_type_name(header.message_type));
109 
110         if (header.terminate_discarded_threads)
111                 fprintf(fp, "td ");
112 
113         if (header.next_clause_prefetch)
114                 fprintf(fp, "ncph ");
115 
116         if (header.next_message_type)
117                 fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));
118         if (header.dependency_wait != 0) {
119                 fprintf(fp, "dwb(");
120                 bool first = true;
121                 for (unsigned i = 0; i < 8; i++) {
122                         if (header.dependency_wait & (1 << i)) {
123                                 if (!first) {
124                                         fprintf(fp, ", ");
125                                 }
126                                 fprintf(fp, "%u", i);
127                                 first = false;
128                         }
129                 }
130                 fprintf(fp, ") ");
131         }
132 
133         fprintf(fp, "\n");
134 }
135 
DecodeRegCtrl(FILE * fp,struct bifrost_regs regs,bool first)136 static struct bifrost_reg_ctrl DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)
137 {
138         struct bifrost_reg_ctrl decoded = {};
139         unsigned ctrl;
140         if (regs.ctrl == 0) {
141                 ctrl = regs.reg1 >> 2;
142                 decoded.read_reg0 = !(regs.reg1 & 0x2);
143                 decoded.read_reg1 = false;
144         } else {
145                 ctrl = regs.ctrl;
146                 decoded.read_reg0 = decoded.read_reg1 = true;
147         }
148 
149         /* Modify control based on state */
150         if (first)
151                 ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);
152         else if (regs.reg2 == regs.reg3)
153                 ctrl += 16;
154 
155         decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];
156         ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 };
157         assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));
158 
159         return decoded;
160 }
161 
dump_regs(FILE * fp,struct bifrost_regs srcs,bool first)162 static void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)
163 {
164         struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);
165         fprintf(fp, "    # ");
166         if (ctrl.read_reg0)
167                 fprintf(fp, "slot 0: r%u ", get_reg0(srcs));
168         if (ctrl.read_reg1)
169                 fprintf(fp, "slot 1: r%u ", get_reg1(srcs));
170 
171         const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";
172 
173         if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)
174                 fprintf(fp, "slot 2: r%u (write FMA) ", srcs.reg2);
175         else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)
176                 fprintf(fp, "slot 2: r%u (write lo FMA) ", srcs.reg2);
177         else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)
178                 fprintf(fp, "slot 2: r%u (write hi FMA) ", srcs.reg2);
179         else if (ctrl.slot23.slot2 == BIFROST_OP_READ)
180                 fprintf(fp, "slot 2: r%u (read) ", srcs.reg2);
181 
182         if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)
183                 fprintf(fp, "slot 3: r%u (write %s) ", srcs.reg3, slot3_fma);
184         else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)
185                 fprintf(fp, "slot 3: r%u (write lo %s) ", srcs.reg3, slot3_fma);
186         else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)
187                 fprintf(fp, "slot 3: r%u (write hi %s) ", srcs.reg3, slot3_fma);
188 
189         if (srcs.fau_idx)
190                 fprintf(fp, "fau %X ", srcs.fau_idx);
191 
192         fprintf(fp, "\n");
193 }
194 
195 static void
bi_disasm_dest_mask(FILE * fp,enum bifrost_reg_op op)196 bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)
197 {
198         if (op == BIFROST_OP_WRITE_LO)
199                 fprintf(fp, ".h0");
200         else if (op == BIFROST_OP_WRITE_HI)
201                 fprintf(fp, ".h1");
202 }
203 
204 void
bi_disasm_dest_fma(FILE * fp,struct bifrost_regs * next_regs,bool last)205 bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last)
206 {
207     /* If this is the last instruction, next_regs points to the first reg entry. */
208     struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
209     if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {
210         fprintf(fp, "r%u:t0", next_regs->reg2);
211         bi_disasm_dest_mask(fp, ctrl.slot23.slot2);
212     } else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {
213         fprintf(fp, "r%u:t0", next_regs->reg3);
214         bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
215     } else
216         fprintf(fp, "t0");
217 }
218 
219 void
bi_disasm_dest_add(FILE * fp,struct bifrost_regs * next_regs,bool last)220 bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last)
221 {
222     /* If this is the last instruction, next_regs points to the first reg entry. */
223     struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);
224 
225     if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {
226         fprintf(fp, "r%u:t1", next_regs->reg3);
227         bi_disasm_dest_mask(fp, ctrl.slot23.slot3);
228     } else
229         fprintf(fp, "t1");
230 }
231 
dump_const_imm(FILE * fp,uint32_t imm)232 static void dump_const_imm(FILE *fp, uint32_t imm)
233 {
234         union {
235                 float f;
236                 uint32_t i;
237         } fi;
238         fi.i = imm;
239         fprintf(fp, "0x%08x /* %f */", imm, fi.f);
240 }
241 
242 static void
dump_pc_imm(FILE * fp,uint64_t imm,unsigned branch_offset,enum bi_constmod mod,bool high32)243 dump_pc_imm(FILE *fp, uint64_t imm, unsigned branch_offset, enum bi_constmod mod, bool high32)
244 {
245         if (mod == BI_CONSTMOD_PC_HI && !high32) {
246                 dump_const_imm(fp, imm);
247                 return;
248         }
249 
250         /* 60-bit sign-extend */
251         uint64_t zx64 = (imm << 4);
252         int64_t sx64 = zx64;
253         sx64 >>= 4;
254 
255         /* 28-bit sign extend x 2 */
256         uint32_t imm32[2] = { (uint32_t) imm, (uint32_t) (imm >> 32) };
257         uint32_t zx32[2] = { imm32[0] << 4, imm32[1] << 4 };
258         int32_t sx32[2] = { zx32[0], zx32[1] };
259         sx32[0] >>= 4;
260         sx32[1] >>= 4;
261 
262         int64_t offs = 0;
263 
264         switch (mod) {
265         case BI_CONSTMOD_PC_LO:
266                 offs = sx64;
267                 break;
268         case BI_CONSTMOD_PC_HI:
269                 offs = sx32[1];
270                 break;
271         case BI_CONSTMOD_PC_LO_HI:
272                 offs = sx32[high32];
273                 break;
274         default:
275                 unreachable("Invalid PC modifier");
276         }
277 
278         assert((offs & 15) == 0);
279         fprintf(fp, "clause_%" PRId64, branch_offset + (offs / 16));
280 
281         if (mod == BI_CONSTMOD_PC_LO && high32)
282                 fprintf(fp, " >> 32");
283 
284         /* While technically in spec, referencing the current clause as (pc +
285          * 0) likely indicates an unintended infinite loop  */
286         if (offs == 0)
287                 fprintf(fp, " /* XXX: likely an infinite loop */");
288 }
289 
290 /* Convert an index to an embedded constant in FAU-RAM to the index of the
291  * embedded constant. No, it's not in order. Yes, really. */
292 
293 static unsigned
const_fau_to_idx(unsigned fau_value)294 const_fau_to_idx(unsigned fau_value)
295 {
296         unsigned map[8] = {
297                 ~0, ~0, 4, 5, 0, 1, 2, 3
298         };
299 
300         assert(map[fau_value] < 6);
301         return map[fau_value];
302 }
303 
dump_fau_src(FILE * fp,struct bifrost_regs srcs,unsigned branch_offset,struct bi_constants * consts,bool high32)304 static void dump_fau_src(FILE *fp, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool high32)
305 {
306         if (srcs.fau_idx & 0x80) {
307                 unsigned uniform = (srcs.fau_idx & 0x7f);
308                 fprintf(fp, "u%u.w%u", uniform, high32);
309         } else if (srcs.fau_idx >= 0x20) {
310                 unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);
311                 uint64_t imm = consts->raw[idx];
312                 imm |= (srcs.fau_idx & 0xf);
313                 if (consts->mods[idx] != BI_CONSTMOD_NONE)
314                         dump_pc_imm(fp, imm, branch_offset, consts->mods[idx], high32);
315                 else if (high32)
316                         dump_const_imm(fp, imm >> 32);
317                 else
318                         dump_const_imm(fp, imm);
319         } else {
320                 switch (srcs.fau_idx) {
321                 case 0:
322                         fprintf(fp, "#0");
323                         break;
324                 case 1:
325                         fprintf(fp, "lane_id");
326                         break;
327                 case 2:
328                         fprintf(fp, "warp_id");
329                         break;
330                 case 3:
331                         fprintf(fp, "core_id");
332                         break;
333                 case 4:
334                         fprintf(fp, "framebuffer_size");
335                         break;
336                 case 5:
337                         fprintf(fp, "atest_datum");
338                         break;
339                 case 6:
340                         fprintf(fp, "sample");
341                         break;
342                 case 8:
343                 case 9:
344                 case 10:
345                 case 11:
346                 case 12:
347                 case 13:
348                 case 14:
349                 case 15:
350                         fprintf(fp, "blend_descriptor_%u", (unsigned) srcs.fau_idx - 8);
351                         break;
352                 default:
353                         fprintf(fp, "XXX - reserved%u", (unsigned) srcs.fau_idx);
354                         break;
355                 }
356 
357                 if (high32)
358                         fprintf(fp, ".y");
359                 else
360                         fprintf(fp, ".x");
361         }
362 }
363 
364 void
dump_src(FILE * fp,unsigned src,struct bifrost_regs srcs,unsigned branch_offset,struct bi_constants * consts,bool isFMA)365 dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA)
366 {
367         switch (src) {
368         case 0:
369                 fprintf(fp, "r%u", get_reg0(srcs));
370                 break;
371         case 1:
372                 fprintf(fp, "r%u", get_reg1(srcs));
373                 break;
374         case 2:
375                 fprintf(fp, "r%u", srcs.reg2);
376                 break;
377         case 3:
378                 if (isFMA)
379                         fprintf(fp, "#0");
380                 else
381                         fprintf(fp, "t"); // i.e. the output of FMA this cycle
382                 break;
383         case 4:
384                 dump_fau_src(fp, srcs, branch_offset, consts, false);
385                 break;
386         case 5:
387                 dump_fau_src(fp, srcs, branch_offset, consts, true);
388                 break;
389         case 6:
390                 fprintf(fp, "t0");
391                 break;
392         case 7:
393                 fprintf(fp, "t1");
394                 break;
395         }
396 }
397 
398 /* Tables for decoding M0, or if M0 == 7, M1 respectively.
399  *
400  * XXX: It's not clear if the third entry of M1_table corresponding to (7, 2)
401  * should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would
402  * say this needs testing but no code should ever actually use this mode.
403  */
404 
405 static const enum bi_constmod M1_table[7][2] = {
406         { BI_CONSTMOD_NONE, BI_CONSTMOD_NONE },
407         { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
408         { BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO },
409         { ~0, ~0 },
410         { BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE },
411         { BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI },
412         { BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },
413 };
414 
415 static const enum bi_constmod M2_table[4][2] = {
416         { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE },
417         { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
418         { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI },
419         { BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },
420 };
421 
422 static void
decode_M(enum bi_constmod * mod,unsigned M1,unsigned M2,bool single)423 decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single)
424 {
425         if (M1 >= 8) {
426                 mod[0] = BI_CONSTMOD_NONE;
427 
428                 if (!single)
429                         mod[1] = BI_CONSTMOD_NONE;
430 
431                 return;
432         } else if (M1 == 7) {
433                 assert(M2 < 4);
434                 memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));
435         } else {
436                 assert(M1 != 3);
437                 memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));
438         }
439 }
440 
dump_clause(FILE * fp,uint32_t * words,unsigned * size,unsigned offset,bool verbose)441 static void dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset, bool verbose)
442 {
443         // State for a decoded clause
444         struct bifrost_alu_inst instrs[8] = {};
445         struct bi_constants consts = {};
446         unsigned num_instrs = 0;
447         unsigned num_consts = 0;
448         uint64_t header_bits = 0;
449 
450         unsigned i;
451         for (i = 0; ; i++, words += 4) {
452                 if (verbose) {
453                         fprintf(fp, "# ");
454                         for (int j = 0; j < 4; j++)
455                                 fprintf(fp, "%08x ", words[3 - j]); // low bit on the right
456                         fprintf(fp, "\n");
457                 }
458                 unsigned tag = bits(words[0], 0, 8);
459 
460                 // speculatively decode some things that are common between many formats, so we can share some code
461                 struct bifrost_alu_inst main_instr = {};
462                 // 20 bits
463                 main_instr.add_bits = bits(words[2], 2, 32 - 13);
464                 // 23 bits
465                 main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11);
466                 // 35 bits
467                 main_instr.reg_bits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32);
468 
469                 uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60;
470                 uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32;
471 
472                 /* Z-bit */
473                 bool stop = tag & 0x40;
474 
475                 if (verbose) {
476                         fprintf(fp, "# tag: 0x%02x\n", tag);
477                 }
478                 if (tag & 0x80) {
479                         /* Format 5 or 10 */
480                         unsigned idx = stop ? 5 : 2;
481                         main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;
482                         instrs[idx + 1] = main_instr;
483                         instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
484                         instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;
485                         consts.raw[0] = bits(words[3], 17, 32) << 4;
486                 } else {
487                         bool done = false;
488                         switch ((tag >> 3) & 0x7) {
489                         case 0x0:
490                                 switch (tag & 0x7) {
491                                 case 0x3:
492                                         /* Format 1 */
493                                         main_instr.add_bits |= bits(words[3], 29, 32) << 17;
494                                         instrs[1] = main_instr;
495                                         num_instrs = 2;
496                                         done = stop;
497                                         break;
498                                 case 0x4:
499                                         /* Format 3 */
500                                         instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
501                                         instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
502                                         consts.raw[0] = const0;
503                                         decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
504                                         num_instrs = 3;
505                                         num_consts = 1;
506                                         done = stop;
507                                         break;
508                                 case 0x1:
509                                 case 0x5:
510                                         /* Format 4 */
511                                         instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
512                                         instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;
513                                         main_instr.add_bits |= bits(words[3], 26, 29) << 17;
514                                         instrs[3] = main_instr;
515                                         if ((tag & 0x7) == 0x5) {
516                                                 num_instrs = 4;
517                                                 done = stop;
518                                         }
519                                         break;
520                                 case 0x6:
521                                         /* Format 8 */
522                                         instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
523                                         instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
524                                         consts.raw[0] = const0;
525                                         decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);
526                                         num_instrs = 6;
527                                         num_consts = 1;
528                                         done = stop;
529                                         break;
530                                 case 0x7:
531                                         /* Format 9 */
532                                         instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
533                                         instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;
534                                         main_instr.add_bits |= bits(words[3], 26, 29) << 17;
535                                         instrs[6] = main_instr;
536                                         num_instrs = 7;
537                                         done = stop;
538                                         break;
539                                 default:
540                                         unreachable("[INSTR_INVALID_ENC] Invalid tag bits");
541                                 }
542                                 break;
543                         case 0x2:
544                         case 0x3: {
545                                 /* Format 6 or 11 */
546                                 unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
547                                 main_instr.add_bits |= (tag & 0x7) << 17;
548                                 instrs[idx] = main_instr;
549                                 consts.raw[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19;
550                                 num_consts = 1;
551                                 num_instrs = idx + 1;
552                                 done = stop;
553                                 break;
554                         }
555                         case 0x4: {
556                                 /* Format 2 */
557                                 unsigned idx = stop ? 4 : 1;
558                                 main_instr.add_bits |= (tag & 0x7) << 17;
559                                 instrs[idx] = main_instr;
560                                 instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);
561                                 instrs[idx + 1].reg_bits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
562                                 break;
563                         }
564                         case 0x1:
565                                 /* Format 0 - followed by constants */
566                                 num_instrs = 1;
567                                 done = stop;
568                                 FALLTHROUGH;
569                         case 0x5:
570                                 /* Format 0 - followed by instructions */
571                                 header_bits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19));
572                                 main_instr.add_bits |= (tag & 0x7) << 17;
573                                 instrs[0] = main_instr;
574                                 break;
575                         case 0x6:
576                         case 0x7: {
577                                 /* Format 12 */
578                                 unsigned pos = tag & 0xf;
579 
580                                 struct {
581                                         unsigned const_idx;
582                                         unsigned nr_tuples;
583                                 } pos_table[0x10] = {
584                                         { 0, 1 },
585                                         { 0, 2 },
586                                         { 0, 4 },
587                                         { 1, 3 },
588                                         { 1, 5 },
589                                         { 2, 4 },
590                                         { 0, 7 },
591                                         { 1, 6 },
592                                         { 3, 5 },
593                                         { 1, 8 },
594                                         { 2, 7 },
595                                         { 3, 6 },
596                                         { 3, 8 },
597                                         { 4, 7 },
598                                         { 5, 6 },
599                                         { ~0, ~0 }
600                                 };
601 
602                                 ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs;
603                                 assert(valid_count && "INSTR_INVALID_ENC");
604 
605                                 unsigned const_idx = pos_table[pos].const_idx;
606 
607                                 if (num_consts < const_idx + 2)
608                                         num_consts = const_idx + 2;
609 
610                                 consts.raw[const_idx] = const0;
611                                 consts.raw[const_idx + 1] = const1;
612 
613                                 /* Calculate M values from A, B and 4-bit
614                                  * unsigned arithmetic. Mathematically it
615                                  * should be (A - B) % 16 but we use this
616                                  * alternate form to avoid sign issues */
617 
618                                 unsigned A1 = bits(words[2], 0, 4);
619                                 unsigned B1 = bits(words[3], 28, 32);
620                                 unsigned A2 = bits(words[1], 0, 4);
621                                 unsigned B2 = bits(words[2], 28, 32);
622 
623                                 unsigned M1 = (16 + A1 - B1) & 0xF;
624                                 unsigned M2 = (16 + A2 - B2) & 0xF;
625 
626                                 decode_M(&consts.mods[const_idx], M1, M2, false);
627 
628                                 done = stop;
629                                 break;
630                         }
631                         default:
632                                 break;
633                         }
634 
635                         if (done)
636                                 break;
637                 }
638         }
639 
640         *size = i + 1;
641 
642         if (verbose) {
643                 fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);
644         }
645 
646         struct bifrost_header header;
647         memcpy((char *) &header, (char *) &header_bits, sizeof(struct bifrost_header));
648         dump_header(fp, header, verbose);
649 
650         fprintf(fp, "{\n");
651         for (i = 0; i < num_instrs; i++) {
652                 struct bifrost_regs regs, next_regs;
653                 if (i + 1 == num_instrs) {
654                         memcpy((char *) &next_regs, (char *) &instrs[0].reg_bits,
655                                sizeof(next_regs));
656                 } else {
657                         memcpy((char *) &next_regs, (char *) &instrs[i + 1].reg_bits,
658                                sizeof(next_regs));
659                 }
660 
661                 memcpy((char *) &regs, (char *) &instrs[i].reg_bits, sizeof(regs));
662 
663                 if (verbose) {
664                         fprintf(fp, "    # regs: %016" PRIx64 "\n", instrs[i].reg_bits);
665                         dump_regs(fp, regs, i == 0);
666                 }
667 
668                 bi_disasm_fma(fp, instrs[i].fma_bits, &regs, &next_regs,
669                                 header.staging_register, offset, &consts,
670                                 i + 1 == num_instrs);
671 
672                 bi_disasm_add(fp, instrs[i].add_bits, &regs, &next_regs,
673                                 header.staging_register, offset, &consts,
674                                 i + 1 == num_instrs);
675         }
676         fprintf(fp, "}\n");
677 
678         if (verbose) {
679                 for (unsigned i = 0; i < num_consts; i++) {
680                         fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i, consts.raw[i] & 0xffffffff);
681                         fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1, consts.raw[i] >> 32);
682                 }
683         }
684 
685         fprintf(fp, "\n");
686         return;
687 }
688 
disassemble_bifrost(FILE * fp,uint8_t * code,size_t size,bool verbose)689 void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose)
690 {
691         uint32_t *words = (uint32_t *) code;
692         uint32_t *words_end = words + (size / 4);
693         // used for displaying branch targets
694         unsigned offset = 0;
695         while (words != words_end) {
696                 /* Shaders have zero bytes at the end for padding; stop
697                  * disassembling when we hit them. */
698                 if (*words == 0)
699                         break;
700 
701                 fprintf(fp, "clause_%u:\n", offset);
702 
703                 unsigned size;
704                 dump_clause(fp, words, &size, offset, verbose);
705 
706                 words += size * 4;
707                 offset += size;
708         }
709 }
710 
711