1 /*
2  * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <err.h>
26 #include <fcntl.h>
27 #include <getopt.h>
28 #include <stdarg.h>
29 #include <stdbool.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 
36 #include "util/os_file.h"
37 
38 #include "freedreno_pm4.h"
39 
40 #include "afuc.h"
41 #include "util.h"
42 #include "emu.h"
43 
44 static int gpuver;
45 
46 /* non-verbose mode should output something suitable to feed back into
47  * assembler.. verbose mode has additional output useful for debugging
48  * (like unexpected bits that are set)
49  */
50 static bool verbose = false;
51 
52 /* emulator mode: */
53 static bool emulator = false;
54 
55 static void
print_gpu_reg(uint32_t regbase)56 print_gpu_reg(uint32_t regbase)
57 {
58    if (regbase < 0x100)
59       return;
60 
61    char *name = afuc_gpu_reg_name(regbase);
62    if (name) {
63       printf("\t; %s", name);
64       free(name);
65    }
66 }
67 
68 #define printerr(fmt, ...) afuc_printc(AFUC_ERR, fmt, ##__VA_ARGS__)
69 #define printlbl(fmt, ...) afuc_printc(AFUC_LBL, fmt, ##__VA_ARGS__)
70 
71 void
print_src(unsigned reg)72 print_src(unsigned reg)
73 {
74    if (reg == REG_REM)
75       printf("$rem"); /* remainding dwords in packet */
76    else if (reg == REG_MEMDATA)
77       printf("$memdata");
78    else if (reg == REG_REGDATA)
79       printf("$regdata");
80    else if (reg == REG_DATA)
81       printf("$data");
82    else
83       printf("$%02x", reg);
84 }
85 
86 void
print_dst(unsigned reg)87 print_dst(unsigned reg)
88 {
89    if (reg == REG_REM)
90       printf("$rem"); /* remainding dwords in packet */
91    else if (reg == REG_ADDR)
92       printf("$addr");
93    else if (reg == REG_USRADDR)
94       printf("$usraddr");
95    else if (reg == REG_DATA)
96       printf("$data");
97    else
98       printf("$%02x", reg);
99 }
100 
101 static void
print_alu_name(afuc_opc opc,uint32_t instr)102 print_alu_name(afuc_opc opc, uint32_t instr)
103 {
104    if (opc == OPC_ADD) {
105       printf("add ");
106    } else if (opc == OPC_ADDHI) {
107       printf("addhi ");
108    } else if (opc == OPC_SUB) {
109       printf("sub ");
110    } else if (opc == OPC_SUBHI) {
111       printf("subhi ");
112    } else if (opc == OPC_AND) {
113       printf("and ");
114    } else if (opc == OPC_OR) {
115       printf("or ");
116    } else if (opc == OPC_XOR) {
117       printf("xor ");
118    } else if (opc == OPC_NOT) {
119       printf("not ");
120    } else if (opc == OPC_SHL) {
121       printf("shl ");
122    } else if (opc == OPC_USHR) {
123       printf("ushr ");
124    } else if (opc == OPC_ISHR) {
125       printf("ishr ");
126    } else if (opc == OPC_ROT) {
127       printf("rot ");
128    } else if (opc == OPC_MUL8) {
129       printf("mul8 ");
130    } else if (opc == OPC_MIN) {
131       printf("min ");
132    } else if (opc == OPC_MAX) {
133       printf("max ");
134    } else if (opc == OPC_CMP) {
135       printf("cmp ");
136    } else if (opc == OPC_MSB) {
137       printf("msb ");
138    } else {
139       printerr("[%08x]", instr);
140       printf("  ; alu%02x ", opc);
141    }
142 }
143 
144 static const char *
getpm4(uint32_t id)145 getpm4(uint32_t id)
146 {
147    return afuc_pm_id_name(id);
148 }
149 
150 static struct {
151    uint32_t offset;
152    uint32_t num_jump_labels;
153    uint32_t jump_labels[256];
154 } jump_labels[1024];
155 int num_jump_labels;
156 
157 static void
add_jump_table_entry(uint32_t n,uint32_t offset)158 add_jump_table_entry(uint32_t n, uint32_t offset)
159 {
160    int i;
161 
162    if (n > 128) /* can't possibly be a PM4 PKT3.. */
163       return;
164 
165    for (i = 0; i < num_jump_labels; i++)
166       if (jump_labels[i].offset == offset)
167          goto add_label;
168 
169    num_jump_labels = i + 1;
170    jump_labels[i].offset = offset;
171    jump_labels[i].num_jump_labels = 0;
172 
173 add_label:
174    jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n;
175    assert(jump_labels[i].num_jump_labels < 256);
176 }
177 
178 static int
get_jump_table_entry(uint32_t offset)179 get_jump_table_entry(uint32_t offset)
180 {
181    int i;
182 
183    for (i = 0; i < num_jump_labels; i++)
184       if (jump_labels[i].offset == offset)
185          return i;
186 
187    return -1;
188 }
189 
190 static uint32_t label_offsets[0x512];
191 static int num_label_offsets;
192 
193 static int
label_idx(uint32_t offset,bool create)194 label_idx(uint32_t offset, bool create)
195 {
196    int i;
197    for (i = 0; i < num_label_offsets; i++)
198       if (offset == label_offsets[i])
199          return i;
200    if (!create)
201       return -1;
202    label_offsets[i] = offset;
203    num_label_offsets = i + 1;
204    return i;
205 }
206 
207 static const char *
label_name(uint32_t offset,bool allow_jt)208 label_name(uint32_t offset, bool allow_jt)
209 {
210    static char name[12];
211    int lidx;
212 
213    if (allow_jt) {
214       lidx = get_jump_table_entry(offset);
215       if (lidx >= 0) {
216          int j;
217          for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) {
218             uint32_t jump_label = jump_labels[lidx].jump_labels[j];
219             const char *str = getpm4(jump_label);
220             if (str)
221                return str;
222          }
223          // if we don't find anything w/ known name, maybe we should
224          // return UNKN%d to at least make it clear that this is some
225          // sort of jump-table entry?
226       }
227    }
228 
229    lidx = label_idx(offset, false);
230    if (lidx < 0)
231       return NULL;
232    sprintf(name, "l%03d", lidx);
233    return name;
234 }
235 
236 static uint32_t fxn_offsets[0x512];
237 static int num_fxn_offsets;
238 
239 static int
fxn_idx(uint32_t offset,bool create)240 fxn_idx(uint32_t offset, bool create)
241 {
242    int i;
243    for (i = 0; i < num_fxn_offsets; i++)
244       if (offset == fxn_offsets[i])
245          return i;
246    if (!create)
247       return -1;
248    fxn_offsets[i] = offset;
249    num_fxn_offsets = i + 1;
250    return i;
251 }
252 
253 static const char *
fxn_name(uint32_t offset)254 fxn_name(uint32_t offset)
255 {
256    static char name[14];
257    int fidx = fxn_idx(offset, false);
258    if (fidx < 0)
259       return NULL;
260    sprintf(name, "fxn%02d", fidx);
261    return name;
262 }
263 
264 void
print_control_reg(uint32_t id)265 print_control_reg(uint32_t id)
266 {
267    char *name = afuc_control_reg_name(id);
268    if (name) {
269       printf("@%s", name);
270       free(name);
271    } else {
272       printf("0x%03x", id);
273    }
274 }
275 
276 void
print_pipe_reg(uint32_t id)277 print_pipe_reg(uint32_t id)
278 {
279    char *name = afuc_pipe_reg_name(id);
280    if (name) {
281       printf("|%s", name);
282       free(name);
283    } else {
284       printf("0x%03x", id);
285    }
286 }
287 
288 static void
disasm_instr(uint32_t * instrs,unsigned pc)289 disasm_instr(uint32_t *instrs, unsigned pc)
290 {
291    int jump_label_idx;
292    afuc_instr *instr = (void *)&instrs[pc];
293    const char *fname, *lname;
294    afuc_opc opc;
295    bool rep;
296 
297    afuc_get_opc(instr, &opc, &rep);
298 
299    lname = label_name(pc, false);
300    fname = fxn_name(pc);
301    jump_label_idx = get_jump_table_entry(pc);
302 
303    if (jump_label_idx >= 0) {
304       int j;
305       printf("\n");
306       for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) {
307          uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j];
308          const char *name = getpm4(jump_label);
309          if (name) {
310             printlbl("%s", name);
311          } else {
312             printlbl("UNKN%d", jump_label);
313          }
314          printf(":\n");
315       }
316    }
317 
318    if (fname) {
319       printlbl("%s", fname);
320       printf(":\n");
321    }
322 
323    if (lname) {
324       printlbl(" %s", lname);
325       printf(":");
326    } else {
327       printf("      ");
328    }
329 
330    if (verbose) {
331       printf("\t%04x: %08x  ", pc, instrs[pc]);
332    } else {
333       printf("  ");
334    }
335 
336    switch (opc) {
337    case OPC_NOP: {
338       /* a6xx changed the default immediate, and apparently 0
339        * is illegal now.
340        */
341       const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0;
342       if (instrs[pc] != nop) {
343          printerr("[%08x]", instrs[pc]);
344          printf("  ; ");
345       }
346       if (rep)
347          printf("(rep)");
348       printf("nop");
349       print_gpu_reg(instrs[pc]);
350 
351       break;
352    }
353    case OPC_ADD:
354    case OPC_ADDHI:
355    case OPC_SUB:
356    case OPC_SUBHI:
357    case OPC_AND:
358    case OPC_OR:
359    case OPC_XOR:
360    case OPC_NOT:
361    case OPC_SHL:
362    case OPC_USHR:
363    case OPC_ISHR:
364    case OPC_ROT:
365    case OPC_MUL8:
366    case OPC_MIN:
367    case OPC_MAX:
368    case OPC_CMP: {
369       bool src1 = true;
370 
371       if (opc == OPC_NOT)
372          src1 = false;
373 
374       if (rep)
375          printf("(rep)");
376 
377       print_alu_name(opc, instrs[pc]);
378       print_dst(instr->alui.dst);
379       printf(", ");
380       if (src1) {
381          print_src(instr->alui.src);
382          printf(", ");
383       }
384       printf("0x%04x", instr->alui.uimm);
385       print_gpu_reg(instr->alui.uimm);
386 
387       /* print out unexpected bits: */
388       if (verbose) {
389          if (instr->alui.src && !src1)
390             printerr("  (src=%02x)", instr->alui.src);
391       }
392 
393       break;
394    }
395    case OPC_MOVI: {
396       if (rep)
397          printf("(rep)");
398       printf("mov ");
399       print_dst(instr->movi.dst);
400       printf(", 0x%04x", instr->movi.uimm);
401       if (instr->movi.shift)
402          printf(" << %u", instr->movi.shift);
403 
404       if ((instr->movi.dst == REG_ADDR) && (instr->movi.shift >= 16)) {
405          uint32_t val = (uint32_t)instr->movi.uimm << (uint32_t)instr->movi.shift;
406          val &= ~0x40000;  /* b18 seems to be a flag */
407 
408          if ((val & 0x00ffffff) == 0) {
409             printf("\t; ");
410             print_pipe_reg(val >> 24);
411             break;
412          }
413       }
414       /* using mov w/ << 16 is popular way to construct a pkt7
415        * header to send (for ex, from PFP to ME), so check that
416        * case first
417        */
418       if ((instr->movi.shift == 16) &&
419           ((instr->movi.uimm & 0xff00) == 0x7000)) {
420          unsigned opc, p;
421 
422          opc = instr->movi.uimm & 0x7f;
423          p = pm4_odd_parity_bit(opc);
424 
425          /* So, you'd think that checking the parity bit would be
426           * a good way to rule out false positives, but seems like
427           * ME doesn't really care.. at least it would filter out
428           * things that look like actual legit packets between
429           * PFP and ME..
430           */
431          if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) {
432             const char *name = getpm4(opc);
433             printf("\t; ");
434             if (name)
435                printlbl("%s", name);
436             else
437                printlbl("UNKN%u", opc);
438             break;
439          }
440       }
441 
442       print_gpu_reg((uint32_t)instr->movi.uimm << (uint32_t)instr->movi.shift);
443 
444       break;
445    }
446    case OPC_ALU: {
447       bool src1 = true;
448 
449       if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB)
450          src1 = false;
451 
452       if (instr->alu.pad)
453          printf("[%08x]  ; ", instrs[pc]);
454 
455       if (rep)
456          printf("(rep)");
457       if (instr->alu.xmov)
458          printf("(xmov%d)", instr->alu.xmov);
459 
460       /* special case mnemonics:
461        *   reading $00 seems to always yield zero, and so:
462        *      or $dst, $00, $src -> mov $dst, $src
463        *   Maybe add one for negate too, ie.
464        *      sub $dst, $00, $src ???
465        */
466       if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) {
467          printf("mov ");
468          src1 = false;
469       } else {
470          print_alu_name(instr->alu.alu, instrs[pc]);
471       }
472 
473       print_dst(instr->alu.dst);
474       if (src1) {
475          printf(", ");
476          print_src(instr->alu.src1);
477       }
478       printf(", ");
479       print_src(instr->alu.src2);
480 
481       /* print out unexpected bits: */
482       if (verbose) {
483          if (instr->alu.pad)
484             printerr("  (pad=%01x)", instr->alu.pad);
485          if (instr->alu.src1 && !src1)
486             printerr("  (src1=%02x)", instr->alu.src1);
487       }
488 
489       /* xmov is a modifier that makes the processor execute up to 3
490        * extra mov's after the current instruction. Given an ALU
491        * instruction:
492        *
493        * (xmovN) alu $dst, $src1, $src2
494        *
495        * In all of the uses in the firmware blob, $dst and $src2 are one
496        * of the "special" registers $data, $addr, $addr2. I've observed
497        * that if $dst isn't "special" then it's replaced with $00
498        * instead of $data, but I haven't checked what happens if $src2
499        * isn't "special".  Anyway, in the usual case, the HW produces a
500        * count M = min(N, $rem) and then does the following:
501        *
502        * M = 1:
503        * mov $data, $src2
504        *
505        * M = 2:
506        * mov $data, $src2
507        * mov $data, $src2
508        *
509        * M = 3:
510        * mov $data, $src2
511        * mov $dst, $src2 (special case for CP_CONTEXT_REG_BUNCH)
512        * mov $data, $src2
513        *
514        * It seems to be frequently used in combination with (rep) to
515        * provide a kind of hardware-based loop unrolling, and there's
516        * even a special case in the ISA to be able to do this with
517        * CP_CONTEXT_REG_BUNCH. However (rep) isn't required.
518        *
519        * This dumps the expected extra instructions, assuming that $rem
520        * isn't too small.
521        */
522       if (verbose && instr->alu.xmov) {
523          for (int i = 0; i < instr->alu.xmov; i++) {
524             printf("\n        ; mov ");
525             if (instr->alu.dst < 0x1d)
526                printf("$00");
527             else if (instr->alu.xmov == 3 && i == 1)
528                print_dst(instr->alu.dst);
529             else
530                printf("$data");
531             printf(", ");
532             print_src(instr->alu.src2);
533          }
534       }
535 
536       break;
537    }
538    case OPC_CWRITE6:
539    case OPC_CREAD6:
540    case OPC_STORE6:
541    case OPC_LOAD6: {
542       if (rep)
543          printf("(rep)");
544 
545       bool is_control_reg = true;
546       bool is_store = true;
547       if (gpuver >= 6) {
548          switch (opc) {
549          case OPC_CWRITE6:
550             printf("cwrite ");
551             break;
552          case OPC_CREAD6:
553             is_store = false;
554             printf("cread ");
555             break;
556          case OPC_STORE6:
557             is_control_reg = false;
558             printf("store ");
559             break;
560          case OPC_LOAD6:
561             is_control_reg = false;
562             is_store = false;
563             printf("load ");
564             break;
565          default:
566             assert(!"unreachable");
567          }
568       } else {
569          switch (opc) {
570          case OPC_CWRITE5:
571             printf("cwrite ");
572             break;
573          case OPC_CREAD5:
574             is_store = false;
575             printf("cread ");
576             break;
577          default:
578             fprintf(stderr, "A6xx control opcode on A5xx?\n");
579             exit(1);
580          }
581       }
582 
583       if (is_store)
584          print_src(instr->control.src1);
585       else
586          print_dst(instr->control.src1);
587       printf(", [");
588       print_src(instr->control.src2);
589       printf(" + ");
590       if (is_control_reg && instr->control.flags != 0x4)
591          print_control_reg(instr->control.uimm);
592       else
593          printf("0x%03x", instr->control.uimm);
594       printf("], 0x%x", instr->control.flags);
595       break;
596    }
597    case OPC_BRNEI:
598    case OPC_BREQI:
599    case OPC_BRNEB:
600    case OPC_BREQB: {
601       unsigned off = pc + instr->br.ioff;
602 
603       assert(!rep);
604 
605       /* Since $00 reads back zero, it can be used as src for
606        * unconditional branches.  (This only really makes sense
607        * for the BREQB.. or possible BRNEI if imm==0.)
608        *
609        * If bit=0 then branch is taken if *all* bits are zero.
610        * Otherwise it is taken if bit (bit-1) is clear.
611        *
612        * Note the instruction after a jump/branch is executed
613        * regardless of whether branch is taken, so use nop or
614        * take that into account in code.
615        */
616       if (instr->br.src || (opc != OPC_BRNEB)) {
617          bool immed = false;
618 
619          if (opc == OPC_BRNEI) {
620             printf("brne ");
621             immed = true;
622          } else if (opc == OPC_BREQI) {
623             printf("breq ");
624             immed = true;
625          } else if (opc == OPC_BRNEB) {
626             printf("brne ");
627          } else if (opc == OPC_BREQB) {
628             printf("breq ");
629          }
630          print_src(instr->br.src);
631          if (immed) {
632             printf(", 0x%x,", instr->br.bit_or_imm);
633          } else {
634             printf(", b%u,", instr->br.bit_or_imm);
635          }
636       } else {
637          printf("jump");
638          if (verbose && instr->br.bit_or_imm) {
639             printerr("  (src=%03x, bit=%03x) ", instr->br.src,
640                      instr->br.bit_or_imm);
641          }
642       }
643 
644       printf(" #");
645       printlbl("%s", label_name(off, true));
646       if (verbose)
647          printf(" (#%d, %04x)", instr->br.ioff, off);
648       break;
649    }
650    case OPC_CALL:
651       assert(!rep);
652       printf("call #");
653       printlbl("%s", fxn_name(instr->call.uoff));
654       if (verbose) {
655          printf(" (%04x)", instr->call.uoff);
656          if (instr->br.bit_or_imm || instr->br.src) {
657             printerr("  (src=%03x, bit=%03x) ", instr->br.src,
658                      instr->br.bit_or_imm);
659          }
660       }
661       break;
662    case OPC_RET:
663       assert(!rep);
664       if (instr->ret.pad)
665          printf("[%08x]  ; ", instrs[pc]);
666       if (instr->ret.interrupt)
667          printf("iret");
668       else
669          printf("ret");
670       break;
671    case OPC_WIN:
672       assert(!rep);
673       if (instr->waitin.pad)
674          printf("[%08x]  ; ", instrs[pc]);
675       printf("waitin");
676       if (verbose && instr->waitin.pad)
677          printerr("  (pad=%x)", instr->waitin.pad);
678       break;
679    case OPC_PREEMPTLEAVE6:
680       if (gpuver < 6) {
681          printf("[%08x]  ; op38", instrs[pc]);
682       } else {
683          printf("preemptleave #");
684          printlbl("%s", label_name(instr->call.uoff, true));
685       }
686       break;
687    case OPC_SETSECURE:
688       /* Note: This seems to implicitly read the secure/not-secure state
689        * to set from the low bit of $02, and implicitly jumps to pc + 3
690        * (i.e. skipping the next two instructions) if it succeeds. We
691        * print these implicit parameters to make reading the disassembly
692        * easier.
693        */
694       if (instr->pad)
695          printf("[%08x]  ; ", instrs[pc]);
696       printf("setsecure $02, #");
697       printlbl("%s", label_name(pc + 3, true));
698       break;
699    default:
700       printerr("[%08x]", instrs[pc]);
701       printf("  ; op%02x ", opc);
702       print_dst(instr->alui.dst);
703       printf(", ");
704       print_src(instr->alui.src);
705       print_gpu_reg(instrs[pc] & 0xffff);
706       break;
707    }
708    printf("\n");
709 }
710 
711 static void
setup_packet_table(uint32_t * jmptbl,uint32_t sizedwords)712 setup_packet_table(uint32_t *jmptbl, uint32_t sizedwords)
713 {
714    num_jump_labels = 0;
715 
716    for (unsigned i = 0; i < sizedwords; i++) {
717       unsigned offset = jmptbl[i];
718       unsigned n = i; // + CP_NOP;
719       add_jump_table_entry(n, offset);
720    }
721 }
722 
723 static void
setup_labels(uint32_t * instrs,uint32_t sizedwords)724 setup_labels(uint32_t *instrs, uint32_t sizedwords)
725 {
726    afuc_opc opc;
727    bool rep;
728 
729    num_label_offsets = 0;
730 
731    for (unsigned i = 0; i < sizedwords; i++) {
732       afuc_instr *instr = (void *)&instrs[i];
733 
734       afuc_get_opc(instr, &opc, &rep);
735 
736       switch (opc) {
737       case OPC_BRNEI:
738       case OPC_BREQI:
739       case OPC_BRNEB:
740       case OPC_BREQB:
741          label_idx(i + instr->br.ioff, true);
742          break;
743       case OPC_PREEMPTLEAVE6:
744          if (gpuver >= 6)
745             label_idx(instr->call.uoff, true);
746          break;
747       case OPC_CALL:
748          fxn_idx(instr->call.uoff, true);
749          break;
750       case OPC_SETSECURE:
751          /* this implicitly jumps to pc + 3 if successful */
752          label_idx(i + 3, true);
753          break;
754       default:
755          break;
756       }
757    }
758 }
759 
760 static void
disasm(struct emu * emu)761 disasm(struct emu *emu)
762 {
763    uint32_t sizedwords = emu->sizedwords;
764    uint32_t lpac_offset = 0;
765 
766    EMU_GPU_REG(CP_SQE_INSTR_BASE);
767    EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
768 
769    emu_init(emu);
770 
771 #ifdef BOOTSTRAP_DEBUG
772    while (true) {
773       disasm_instr(emu->instrs, emu->gpr_regs.pc);
774       emu_step(emu);
775    }
776 #endif
777 
778    emu_run_bootstrap(emu);
779 
780    /* Figure out if we have LPAC SQE appended: */
781    if (emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE)) {
782       lpac_offset = emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE) -
783             emu_get_reg64(emu, &CP_SQE_INSTR_BASE);
784       lpac_offset /= 4;
785       sizedwords = lpac_offset;
786    }
787 
788    setup_packet_table(emu->jmptbl, ARRAY_SIZE(emu->jmptbl));
789    setup_labels(emu->instrs, emu->sizedwords);
790 
791    /* TODO add option to emulate LPAC SQE instead: */
792    if (emulator) {
793       /* Start from clean slate: */
794       emu_fini(emu);
795       emu_init(emu);
796 
797       while (true) {
798          disasm_instr(emu->instrs, emu->gpr_regs.pc);
799          emu_step(emu);
800       }
801    }
802 
803    /* print instructions: */
804    for (int i = 0; i < sizedwords; i++) {
805       disasm_instr(emu->instrs, i);
806    }
807 
808    if (!lpac_offset)
809       return;
810 
811    printf(";\n");
812    printf("; LPAC microcode:\n");
813    printf(";\n");
814 
815    emu_fini(emu);
816 
817    emu->lpac = true;
818    emu->instrs += lpac_offset;
819    emu->sizedwords -= lpac_offset;
820 
821    emu_init(emu);
822    emu_run_bootstrap(emu);
823 
824    setup_packet_table(emu->jmptbl, ARRAY_SIZE(emu->jmptbl));
825    setup_labels(emu->instrs, emu->sizedwords);
826 
827    /* print instructions: */
828    for (int i = 0; i < emu->sizedwords; i++) {
829       disasm_instr(emu->instrs, i);
830    }
831 }
832 
833 
834 static void
disasm_legacy(uint32_t * buf,int sizedwords)835 disasm_legacy(uint32_t *buf, int sizedwords)
836 {
837    uint32_t *instrs = buf;
838    const int jmptbl_start = instrs[1] & 0xffff;
839    uint32_t *jmptbl = &buf[jmptbl_start];
840    int i;
841 
842    /* parse jumptable: */
843    setup_packet_table(jmptbl, 0x80);
844 
845    /* do a pre-pass to find instructions that are potential branch targets,
846     * and add labels for them:
847     */
848    setup_labels(instrs, jmptbl_start);
849 
850    /* print instructions: */
851    for (i = 0; i < jmptbl_start; i++) {
852       disasm_instr(instrs, i);
853    }
854 
855    /* print jumptable: */
856    if (verbose) {
857       printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
858       printf("; JUMP TABLE\n");
859       for (i = 0; i < 0x7f; i++) {
860          int n = i; // + CP_NOP;
861          uint32_t offset = jmptbl[i];
862          const char *name = getpm4(n);
863          printf("%3d %02x: ", n, n);
864          printf("%04x", offset);
865          if (name) {
866             printf("   ; %s", name);
867          } else {
868             printf("   ; UNKN%d", n);
869          }
870          printf("\n");
871       }
872    }
873 }
874 
875 static void
usage(void)876 usage(void)
877 {
878    fprintf(stderr, "Usage:\n"
879                    "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n"
880                    "\t\t-g - specify GPU version (5, etc)\n"
881                    "\t\t-c - use colors\n"
882                    "\t\t-v - verbose output\n"
883                    "\t\t-e - emulator mode\n");
884    exit(2);
885 }
886 
887 int
main(int argc,char ** argv)888 main(int argc, char **argv)
889 {
890    uint32_t *buf;
891    char *file;
892    bool colors = false;
893    uint32_t gpu_id = 0;
894    size_t sz;
895    int c, ret;
896    bool unit_test = false;
897 
898    /* Argument parsing: */
899    while ((c = getopt(argc, argv, "g:vceu")) != -1) {
900       switch (c) {
901       case 'g':
902          gpu_id = atoi(optarg);
903          break;
904       case 'v':
905          verbose = true;
906          break;
907       case 'c':
908          colors = true;
909          break;
910       case 'e':
911          emulator = true;
912          verbose  = true;
913          break;
914       case 'u':
915          unit_test = true;
916          break;
917       default:
918          usage();
919       }
920    }
921 
922    if (optind >= argc) {
923       fprintf(stderr, "no file specified!\n");
924       usage();
925    }
926 
927    file = argv[optind];
928 
929    /* if gpu version not specified, infer from filename: */
930    if (!gpu_id) {
931       char *str = strstr(file, "a5");
932       if (!str)
933          str = strstr(file, "a6");
934       if (str)
935          gpu_id = atoi(str + 1);
936    }
937 
938    if (gpu_id < 500) {
939       printf("invalid gpu_id: %d\n", gpu_id);
940       return -1;
941    }
942 
943    gpuver = gpu_id / 100;
944 
945    /* a6xx is *mostly* a superset of a5xx, but some opcodes shuffle
946     * around, and behavior of special regs is a bit different.  Right
947     * now we only bother to support the a6xx variant.
948     */
949    if (emulator && (gpuver != 6)) {
950       fprintf(stderr, "Emulator only supported on a6xx!\n");
951       return 1;
952    }
953 
954    ret = afuc_util_init(gpuver, colors);
955    if (ret < 0) {
956       usage();
957    }
958 
959    printf("; a%dxx microcode\n", gpuver);
960 
961    buf = (uint32_t *)os_read_file(file, &sz);
962 
963    if (!unit_test)
964       printf("; Disassembling microcode: %s\n", file);
965    printf("; Version: %08x\n\n", buf[1]);
966 
967    if (gpuver < 6) {
968       disasm_legacy(&buf[1], sz / 4 - 1);
969    } else {
970       struct emu emu = {
971             .instrs = &buf[1],
972             .sizedwords = sz / 4 - 1,
973             .gpu_id = gpu_id,
974       };
975 
976       disasm(&emu);
977    }
978 
979    return 0;
980 }
981