1 /*
2  * Copyright © 2020 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 /*
25  * Decoder for devcoredump traces from drm/msm.  In case of a gpu crash/hang,
26  * the coredump should be found in:
27  *
28  *    /sys/class/devcoredump/devcd<n>/data
29  *
30  * The crashdump will hang around for 5min, it can be cleared by writing to
31  * the file, ie:
32  *
33  *    echo 1 > /sys/class/devcoredump/devcd<n>/data
34  *
35  * (the driver won't log any new crashdumps until the previous one is cleared
36  * or times out after 5min)
37  */
38 
39 #include <assert.h>
40 #include <getopt.h>
41 #include <inttypes.h>
42 #include <stdarg.h>
43 #include <stdbool.h>
44 #include <stdint.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 
50 #include "freedreno_pm4.h"
51 
52 #include "ir3/instr-a3xx.h"
53 #include "buffers.h"
54 #include "cffdec.h"
55 #include "disasm.h"
56 #include "pager.h"
57 #include "rnnutil.h"
58 #include "util.h"
59 
60 static FILE *in;
61 static bool verbose;
62 
63 static struct rnn *rnn_gmu;
64 static struct rnn *rnn_control;
65 static struct rnn *rnn_pipe;
66 
67 static struct cffdec_options options = {
68    .draw_filter = -1,
69 };
70 
71 static inline bool
is_a6xx(void)72 is_a6xx(void)
73 {
74    return (600 <= options.gpu_id) && (options.gpu_id < 700);
75 }
76 static inline bool
is_a5xx(void)77 is_a5xx(void)
78 {
79    return (500 <= options.gpu_id) && (options.gpu_id < 600);
80 }
81 static inline bool
is_64b(void)82 is_64b(void)
83 {
84    return options.gpu_id >= 500;
85 }
86 
87 /*
88  * Helpers to read register values:
89  */
90 
91 /* read registers that are 64b on 64b GPUs (ie. a5xx+) */
92 static uint64_t
regval64(const char * name)93 regval64(const char *name)
94 {
95    unsigned reg = regbase(name);
96    assert(reg);
97    uint64_t val = reg_val(reg);
98    if (is_64b())
99       val |= ((uint64_t)reg_val(reg + 1)) << 32;
100    return val;
101 }
102 
103 static uint32_t
regval(const char * name)104 regval(const char *name)
105 {
106    unsigned reg = regbase(name);
107    assert(reg);
108    return reg_val(reg);
109 }
110 
111 /*
112  * Line reading and string helpers:
113  */
114 
115 static char *
replacestr(char * line,const char * find,const char * replace)116 replacestr(char *line, const char *find, const char *replace)
117 {
118    char *tail, *s;
119 
120    if (!(s = strstr(line, find)))
121       return line;
122 
123    tail = s + strlen(find);
124 
125    char *newline;
126    asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail);
127    free(line);
128 
129    return newline;
130 }
131 
132 static char *lastline;
133 static char *pushedline;
134 
135 static const char *
popline(void)136 popline(void)
137 {
138    char *r = pushedline;
139 
140    if (r) {
141       pushedline = NULL;
142       return r;
143    }
144 
145    free(lastline);
146 
147    size_t n = 0;
148    if (getline(&r, &n, in) < 0)
149       exit(0);
150 
151    /* Handle section name typo's from earlier kernels: */
152    r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL");
153    r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT");
154 
155    lastline = r;
156    return r;
157 }
158 
159 static void
pushline(void)160 pushline(void)
161 {
162    assert(!pushedline);
163    pushedline = lastline;
164 }
165 
166 static uint32_t *
popline_ascii85(uint32_t sizedwords)167 popline_ascii85(uint32_t sizedwords)
168 {
169    const char *line = popline();
170 
171    /* At this point we exepct the ascii85 data to be indented *some*
172     * amount, and to terminate at the end of the line.  So just eat
173     * up the leading whitespace.
174     */
175    assert(*line == ' ');
176    while (*line == ' ')
177       line++;
178 
179    uint32_t *buf = calloc(1, 4 * sizedwords);
180    int idx = 0;
181 
182    while (*line != '\n') {
183       if (*line == 'z') {
184          buf[idx++] = 0;
185          line++;
186          continue;
187       }
188 
189       uint32_t accum = 0;
190       for (int i = 0; (i < 5) && (*line != '\n'); i++) {
191          accum *= 85;
192          accum += *line - '!';
193          line++;
194       }
195 
196       buf[idx++] = accum;
197    }
198 
199    return buf;
200 }
201 
202 static bool
startswith(const char * line,const char * start)203 startswith(const char *line, const char *start)
204 {
205    return strstr(line, start) == line;
206 }
207 
208 static void
parseline(const char * line,const char * fmt,...)209 parseline(const char *line, const char *fmt, ...)
210 {
211    int fmtlen = strlen(fmt);
212    int n = 0;
213    int l = 0;
214 
215    /* scan fmt string to extract expected # of conversions: */
216    for (int i = 0; i < fmtlen; i++) {
217       if (fmt[i] == '%') {
218          if (i == (l - 1)) { /* prev char was %, ie. we have %% */
219             n--;
220             l = 0;
221          } else {
222             n++;
223             l = i;
224          }
225       }
226    }
227 
228    va_list ap;
229    va_start(ap, fmt);
230    if (vsscanf(line, fmt, ap) != n) {
231       fprintf(stderr, "parse error scanning: '%s'\n", fmt);
232       exit(1);
233    }
234    va_end(ap);
235 }
236 
237 #define foreach_line_in_section(_line)                                         \
238    for (const char *_line = popline(); _line; _line = popline())               \
239       /* check for start of next section */                                    \
240       if (_line[0] != ' ') {                                                   \
241          pushline();                                                           \
242          break;                                                                \
243       } else
244 
245 /*
246  * Decode ringbuffer section:
247  */
248 
249 static struct {
250    uint64_t iova;
251    uint32_t rptr;
252    uint32_t wptr;
253    uint32_t size;
254    uint32_t *buf;
255 } ringbuffers[5];
256 
257 static void
decode_ringbuffer(void)258 decode_ringbuffer(void)
259 {
260    int id = 0;
261 
262    foreach_line_in_section (line) {
263       if (startswith(line, "  - id:")) {
264          parseline(line, "  - id: %d", &id);
265          assert(id < ARRAY_SIZE(ringbuffers));
266       } else if (startswith(line, "    iova:")) {
267          parseline(line, "    iova: %" PRIx64, &ringbuffers[id].iova);
268       } else if (startswith(line, "    rptr:")) {
269          parseline(line, "    rptr: %d", &ringbuffers[id].rptr);
270       } else if (startswith(line, "    wptr:")) {
271          parseline(line, "    wptr: %d", &ringbuffers[id].wptr);
272       } else if (startswith(line, "    size:")) {
273          parseline(line, "    size: %d", &ringbuffers[id].size);
274       } else if (startswith(line, "    data: !!ascii85 |")) {
275          ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4);
276          add_buffer(ringbuffers[id].iova, ringbuffers[id].size,
277                     ringbuffers[id].buf);
278          continue;
279       }
280 
281       printf("%s", line);
282    }
283 }
284 
285 static bool
valid_header(uint32_t pkt)286 valid_header(uint32_t pkt)
287 {
288    if (options.gpu_id >= 500) {
289       return pkt_is_type4(pkt) || pkt_is_type7(pkt);
290    } else {
291       /* TODO maybe we can check validish looking pkt3 opc or pkt0
292        * register offset.. the cmds sent by kernel are usually
293        * fairly limited (other than initialization) which confines
294        * the search space a bit..
295        */
296       return true;
297    }
298 }
299 
300 static void
dump_cmdstream(void)301 dump_cmdstream(void)
302 {
303    uint64_t rb_base = regval64("CP_RB_BASE");
304 
305    printf("got rb_base=%" PRIx64 "\n", rb_base);
306 
307    options.ibs[1].base = regval64("CP_IB1_BASE");
308    options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
309    options.ibs[2].base = regval64("CP_IB2_BASE");
310    options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
311 
312    /* Adjust remaining size to account for cmdstream slurped into ROQ
313     * but not yet consumed by SQE
314     *
315     * TODO add support for earlier GPUs once we tease out the needed
316     * registers.. see crashit.c in msmtest for hints.
317     *
318     * TODO it would be nice to be able to extract out register bitfields
319     * by name rather than hard-coding this.
320     */
321    if (is_a6xx()) {
322       options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
323       options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
324    }
325 
326    printf("IB1: %" PRIx64 ", %u\n", options.ibs[1].base, options.ibs[1].rem);
327    printf("IB2: %" PRIx64 ", %u\n", options.ibs[2].base, options.ibs[2].rem);
328 
329    /* now that we've got the regvals we want, reset register state
330     * so we aren't seeing values from decode_registers();
331     */
332    reset_regs();
333 
334    for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) {
335       if (ringbuffers[id].iova != rb_base)
336          continue;
337       if (!ringbuffers[id].size)
338          continue;
339 
340       printf("found ring!\n");
341 
342       /* The kernel level ringbuffer (RB) wraps around, which
343        * cffdec doesn't really deal with.. so figure out how
344        * many dwords are unread
345        */
346       unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */
347 
348       if (verbose) {
349          dump_commands(ringbuffers[id].buf, ringszdw, 0);
350          return;
351       }
352 
353 /* helper macro to deal with modulo size math: */
354 #define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
355 
356       /* The rptr will (most likely) have moved past the IB to
357        * userspace cmdstream, so back up a bit, and then advance
358        * until we find a valid start of a packet.. this is going
359        * to be less reliable on a4xx and before (pkt0/pkt3),
360        * compared to pkt4/pkt7 with parity bits
361        */
362       const int lookback = 12;
363       unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback);
364 
365       for (int idx = 0; idx < lookback; idx++) {
366          if (valid_header(ringbuffers[id].buf[rptr]))
367             break;
368          rptr = mod_add(rptr, 1);
369       }
370 
371       unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr);
372 
373       printf("got cmdszdw=%d\n", cmdszdw);
374       uint32_t *buf = malloc(cmdszdw * 4);
375 
376       for (int idx = 0; idx < cmdszdw; idx++) {
377          int p = mod_add(rptr, idx);
378          buf[idx] = ringbuffers[id].buf[p];
379       }
380 
381       dump_commands(buf, cmdszdw, 0);
382       free(buf);
383    }
384 }
385 
386 /*
387  * Decode 'bos' (buffers) section:
388  */
389 
390 static void
decode_bos(void)391 decode_bos(void)
392 {
393    uint32_t size = 0;
394    uint64_t iova = 0;
395 
396    foreach_line_in_section (line) {
397       if (startswith(line, "  - iova:")) {
398          parseline(line, "  - iova: %" PRIx64, &iova);
399       } else if (startswith(line, "    size:")) {
400          parseline(line, "    size: %u", &size);
401       } else if (startswith(line, "    data: !!ascii85 |")) {
402          uint32_t *buf = popline_ascii85(size / 4);
403 
404          if (verbose)
405             dump_hex_ascii(buf, size, 1);
406 
407          add_buffer(iova, size, buf);
408 
409          continue;
410       }
411 
412       printf("%s", line);
413    }
414 }
415 
416 /*
417  * Decode registers section:
418  */
419 
420 static void
dump_register(struct rnn * rnn,uint32_t offset,uint32_t value)421 dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
422 {
423    struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
424    if (info && info->typeinfo) {
425       char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
426       printf("%s: %s\n", info->name, decoded);
427    } else if (info) {
428       printf("%s: %08x\n", info->name, value);
429    } else {
430       printf("<%04x>: %08x\n", offset, value);
431    }
432 }
433 
434 static void
decode_gmu_registers(void)435 decode_gmu_registers(void)
436 {
437    foreach_line_in_section (line) {
438       uint32_t offset, value;
439       parseline(line, "  - { offset: %x, value: %x }", &offset, &value);
440 
441       printf("\t%08x\t", value);
442       dump_register(rnn_gmu, offset / 4, value);
443    }
444 }
445 
446 static void
decode_registers(void)447 decode_registers(void)
448 {
449    foreach_line_in_section (line) {
450       uint32_t offset, value;
451       parseline(line, "  - { offset: %x, value: %x }", &offset, &value);
452 
453       reg_set(offset / 4, value);
454       printf("\t%08x", value);
455       dump_register_val(offset / 4, value, 0);
456    }
457 }
458 
459 /* similar to registers section, but for banked context regs: */
460 static void
decode_clusters(void)461 decode_clusters(void)
462 {
463    foreach_line_in_section (line) {
464       if (startswith(line, "  - cluster-name:") ||
465           startswith(line, "    - context:")) {
466          printf("%s", line);
467          continue;
468       }
469 
470       uint32_t offset, value;
471       parseline(line, "      - { offset: %x, value: %x }", &offset, &value);
472 
473       printf("\t%08x", value);
474       dump_register_val(offset / 4, value, 0);
475    }
476 }
477 
478 /*
479  * Decode indexed-registers.. these aren't like normal registers, but a
480  * sort of FIFO where successive reads pop out associated debug state.
481  */
482 
483 static void
dump_cp_sqe_stat(uint32_t * stat)484 dump_cp_sqe_stat(uint32_t *stat)
485 {
486    printf("\t PC: %04x\n", stat[0]);
487    stat++;
488 
489    if (is_a6xx() && valid_header(stat[0])) {
490       if (pkt_is_type7(stat[0])) {
491          unsigned opc = cp_type7_opcode(stat[0]);
492          const char *name = pktname(opc);
493          if (name)
494             printf("\tPKT: %s\n", name);
495       } else {
496          /* Not sure if this case can happen: */
497       }
498    }
499 
500    for (int i = 0; i < 16; i++) {
501       printf("\t$%02x: %08x\t\t$%02x: %08x\n", i + 1, stat[i], i + 16 + 1,
502              stat[i + 16]);
503    }
504 }
505 
506 static void
dump_control_regs(uint32_t * regs)507 dump_control_regs(uint32_t *regs)
508 {
509    if (!rnn_control)
510       return;
511 
512    /* Control regs 0x100-0x17f are a scratch space to be used by the
513     * firmware however it wants, unlike lower regs which involve some
514     * fixed-function units. Therefore only these registers get dumped
515     * directly.
516     */
517    for (uint32_t i = 0; i < 0x80; i++) {
518       printf("\t%08x\t", regs[i]);
519       dump_register(rnn_control, i + 0x100, regs[i]);
520    }
521 }
522 
523 static void
dump_cp_ucode_dbg(uint32_t * dbg)524 dump_cp_ucode_dbg(uint32_t *dbg)
525 {
526    /* Notes on the data:
527     * There seems to be a section every 4096 DWORD's. The sections aren't
528     * all the same size, so the rest of the 4096 DWORD's are filled with
529     * mirrors of the actual data.
530     */
531 
532    for (int section = 0; section < 6; section++, dbg += 0x1000) {
533       switch (section) {
534       case 0:
535          /* Contains scattered data from a630_sqe.fw: */
536          printf("\tSQE instruction cache:\n");
537          dump_hex_ascii(dbg, 4 * 0x400, 1);
538          break;
539       case 1:
540          printf("\tUnknown 1:\n");
541          dump_hex_ascii(dbg, 4 * 0x80, 1);
542          break;
543       case 2:
544          printf("\tUnknown 2:\n");
545          dump_hex_ascii(dbg, 4 * 0x200, 1);
546          break;
547       case 3:
548          printf("\tUnknown 3:\n");
549          dump_hex_ascii(dbg, 4 * 0x80, 1);
550          break;
551       case 4:
552          /* Don't bother printing this normally */
553          if (verbose) {
554             printf("\tSQE packet jumptable contents:\n");
555             dump_hex_ascii(dbg, 4 * 0x80, 1);
556          }
557          break;
558       case 5:
559          printf("\tSQE scratch control regs:\n");
560          dump_control_regs(dbg);
561          break;
562       }
563    }
564 }
565 
566 static void
dump_mem_pool_reg_write(unsigned reg,uint32_t data,unsigned context,bool pipe)567 dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context,
568                         bool pipe)
569 {
570    if (pipe) {
571       struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
572       printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
573 
574       if (!strcmp(info->typeinfo->name, "void")) {
575          /* registers that ignore their payload */
576       } else {
577          printf("\t\t\t");
578          dump_register(rnn_pipe, reg, data);
579       }
580    } else {
581       printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
582       dump_register_val(reg, data, 2);
583    }
584 }
585 
586 static void
dump_mem_pool_chunk(const uint32_t * chunk)587 dump_mem_pool_chunk(const uint32_t *chunk)
588 {
589    struct __attribute__((packed)) {
590       bool reg0_enabled : 1;
591       bool reg1_enabled : 1;
592       uint32_t data0 : 32;
593       uint32_t data1 : 32;
594       uint32_t reg0 : 18;
595       uint32_t reg1 : 18;
596       bool reg0_pipe : 1;
597       bool reg1_pipe : 1;
598       uint32_t reg0_context : 1;
599       uint32_t reg1_context : 1;
600       uint32_t padding : 22;
601    } fields;
602 
603    memcpy(&fields, chunk, 4 * sizeof(uint32_t));
604 
605    if (fields.reg0_enabled) {
606       dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context,
607                               fields.reg0_pipe);
608    }
609 
610    if (fields.reg1_enabled) {
611       dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context,
612                               fields.reg1_pipe);
613    }
614 }
615 
616 static void
dump_cp_mem_pool(uint32_t * mempool)617 dump_cp_mem_pool(uint32_t *mempool)
618 {
619    /* The mem pool is a shared pool of memory used for storing in-flight
620     * register writes. There are 6 different queues, one for each
621     * cluster. Writing to $data (or for some special registers, $addr)
622     * pushes data onto the appropriate queue, and each queue is pulled
623     * from by the appropriate cluster. The queues are thus written to
624     * in-order, but may be read out-of-order.
625     *
626     * The queues are conceptually divided into 128-bit "chunks", and the
627     * read and write pointers are in units of chunks.  These chunks are
628     * organized internally into 8-chunk "blocks", and memory is allocated
629     * dynamically in terms of blocks. Each queue is represented as a
630     * singly-linked list of blocks, as well as 3-bit start/end chunk
631     * pointers that point within the first/last block.  The next pointers
632     * are located in a separate array, rather than inline.
633     */
634 
635    /* TODO: The firmware CP_MEM_POOL save/restore routines do something
636     * like:
637     *
638     * cread $02, [ $00 + 0 ]
639     * and $02, $02, 0x118
640     * ...
641     * brne $02, 0, #label
642     * mov $03, 0x2000
643     * mov $03, 0x1000
644     * label:
645     * ...
646     *
647     * I think that control register 0 is the GPU version, and some
648     * versions have a smaller mem pool. It seems some models have a mem
649     * pool that's half the size, and a bunch of offsets are shifted
650     * accordingly. Unfortunately the kernel driver's dumping code doesn't
651     * seem to take this into account, even the downstream android driver,
652     * and we don't know which versions 0x8, 0x10, or 0x100 correspond
653     * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
654     */
655    bool small_mem_pool = false;
656 
657    /* The array of next pointers for each block. */
658    const uint32_t *next_pointers =
659       small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
660 
661    /* Maximum number of blocks in the pool, also the size of the pointers
662     * array.
663     */
664    const int num_blocks = small_mem_pool ? 0x30 : 0x80;
665 
666    /* Number of queues */
667    const unsigned num_queues = 6;
668 
669    /* Unfortunately the per-queue state is a little more complicated than
670     * a simple pair of begin/end pointers. Instead of a single beginning
671     * block, there are *two*, with the property that either the two are
672     * equal or the second is the "next" of the first. Similarly there are
673     * two end blocks. Thus the queue either looks like this:
674     *
675     * A -> B -> ... -> C -> D
676     *
677     * Or like this, or some combination:
678     *
679     * A/B -> ... -> C/D
680     *
681     * However, there's only one beginning/end chunk offset. Now the
682     * question is, which of A or B is the actual start? I.e. is the chunk
683     * offset an offset inside A or B? It depends. I'll show a typical read
684     * cycle, starting here (read pointer marked with a *) with a chunk
685     * offset of 0:
686     *
687     *	  A                    B
688     *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
689     * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
690     *
691     * Once the pointer advances far enough, the hardware decides to free
692     * A, after which the read-side state looks like:
693     *
694     *	(free)                A/B
695     *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
696     * |_|_|_|_|_|_|_|_|    |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
697     *
698     * Then after advancing the pointer a bit more, the hardware fetches
699     * the "next" pointer for A and stores it in B:
700     *
701     *	(free)                 A                     B
702     *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
703     * |_|_|_|_|_|_|_|_|    |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
704     *
705     * Then the read pointer advances into B, at which point we've come
706     * back to the first state having advanced a whole block:
707     *
708     *	(free)                 A                     B
709     *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
710     * |_|_|_|_|_|_|_|_|    |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
711     *
712     *
713     * There is a similar cycle for the write pointer. Now, the question
714     * is, how do we know which state we're in? We need to know this to
715     * know whether the pointer (*) is in A or B if they're different. It
716     * seems like there should be some bit somewhere describing this, but
717     * after lots of experimentation I've come up empty-handed. For now we
718     * assume that if the pointer is in the first half, then we're in
719     * either the first or second state and use B, and otherwise we're in
720     * the second or third state and use A. So far I haven't seen anything
721     * that violates this assumption.
722     */
723 
724    struct {
725       uint32_t unk0;
726       uint32_t padding0[7]; /* Mirrors of unk0 */
727 
728       struct {
729          uint32_t chunk : 3;
730          uint32_t first_block : 32 - 3;
731       } writer[6];
732       uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
733 
734       uint32_t unk1;
735       uint32_t padding2[7]; /* Mirrors of unk1 */
736 
737       uint32_t writer_second_block[6];
738       uint32_t padding3[2];
739 
740       uint32_t unk2[6];
741       uint32_t padding4[2];
742 
743       struct {
744          uint32_t chunk : 3;
745          uint32_t first_block : 32 - 3;
746       } reader[6];
747       uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
748 
749       uint32_t unk3;
750       uint32_t padding6[7]; /* Mirrors of unk3 */
751 
752       uint32_t reader_second_block[6];
753       uint32_t padding7[2];
754 
755       uint32_t block_count[6];
756       uint32_t padding[2];
757 
758       uint32_t unk4;
759       uint32_t padding9[7]; /* Mirrors of unk4 */
760    } data1;
761 
762    const uint32_t *data1_ptr =
763       small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
764    memcpy(&data1, data1_ptr, sizeof(data1));
765 
766    /* Based on the kernel, the first dword is the mem pool size (in
767     * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
768     */
769    const uint32_t *data2_ptr =
770       small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
771    const int data2_size = 0x60;
772 
773    /* This seems to be the size of each queue in chunks. */
774    const uint32_t *queue_sizes = &data2_ptr[0x18];
775 
776    printf("\tdata2:\n");
777    dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
778 
779    /* These seem to be some kind of counter of allocated/deallocated blocks */
780    if (verbose) {
781       printf("\tunk0: %x\n", data1.unk0);
782       printf("\tunk1: %x\n", data1.unk1);
783       printf("\tunk3: %x\n", data1.unk3);
784       printf("\tunk4: %x\n\n", data1.unk4);
785    }
786 
787    for (int queue = 0; queue < num_queues; queue++) {
788       const char *cluster_names[6] = {"FE",   "SP_VS", "PC_VS",
789                                       "GRAS", "SP_PS", "PS"};
790       printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
791 
792       if (verbose) {
793          printf("\t\twriter_first_block: 0x%x\n",
794                 data1.writer[queue].first_block);
795          printf("\t\twriter_second_block: 0x%x\n",
796                 data1.writer_second_block[queue]);
797          printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
798          printf("\t\treader_first_block: 0x%x\n",
799                 data1.reader[queue].first_block);
800          printf("\t\treader_second_block: 0x%x\n",
801                 data1.reader_second_block[queue]);
802          printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
803          printf("\t\tblock_count: %d\n", data1.block_count[queue]);
804          printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
805          printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
806       }
807 
808       uint32_t cur_chunk = data1.reader[queue].chunk;
809       uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block
810                                          : data1.reader_second_block[queue];
811       uint32_t last_chunk = data1.writer[queue].chunk;
812       uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block
813                                            : data1.writer_second_block[queue];
814 
815       if (verbose)
816          printf("\tblock %x\n", cur_block);
817       if (cur_block >= num_blocks) {
818          fprintf(stderr, "block %x too large\n", cur_block);
819          exit(1);
820       }
821       unsigned calculated_queue_size = 0;
822       while (cur_block != last_block || cur_chunk != last_chunk) {
823          calculated_queue_size++;
824          uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
825 
826          dump_mem_pool_chunk(chunk_ptr);
827 
828          printf("\t%05x: %08x %08x %08x %08x\n",
829                 4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0],
830                 chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
831 
832          cur_chunk++;
833          if (cur_chunk == 8) {
834             cur_block = next_pointers[cur_block];
835             if (verbose)
836                printf("\tblock %x\n", cur_block);
837             if (cur_block >= num_blocks) {
838                fprintf(stderr, "block %x too large\n", cur_block);
839                exit(1);
840             }
841             cur_chunk = 0;
842          }
843       }
844       if (calculated_queue_size != queue_sizes[queue]) {
845          printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n",
846                 calculated_queue_size);
847       }
848       printf("\n");
849    }
850 }
851 
852 static void
decode_indexed_registers(void)853 decode_indexed_registers(void)
854 {
855    char *name = NULL;
856    uint32_t sizedwords = 0;
857 
858    foreach_line_in_section (line) {
859       if (startswith(line, "  - regs-name:")) {
860          free(name);
861          parseline(line, "  - regs-name: %ms", &name);
862       } else if (startswith(line, "    dwords:")) {
863          parseline(line, "    dwords: %u", &sizedwords);
864       } else if (startswith(line, "    data: !!ascii85 |")) {
865          uint32_t *buf = popline_ascii85(sizedwords);
866 
867          /* some of the sections are pretty large, and are (at least
868           * so far) not useful, so skip them if not in verbose mode:
869           */
870          bool dump = verbose || !strcmp(name, "CP_SQE_STAT") ||
871                      !strcmp(name, "CP_DRAW_STATE") ||
872                      !strcmp(name, "CP_ROQ") || 0;
873 
874          if (!strcmp(name, "CP_SQE_STAT"))
875             dump_cp_sqe_stat(buf);
876 
877          if (!strcmp(name, "CP_UCODE_DBG_DATA"))
878             dump_cp_ucode_dbg(buf);
879 
880          if (!strcmp(name, "CP_MEMPOOL"))
881             dump_cp_mem_pool(buf);
882 
883          if (dump)
884             dump_hex_ascii(buf, 4 * sizedwords, 1);
885 
886          free(buf);
887 
888          continue;
889       }
890 
891       printf("%s", line);
892    }
893 }
894 
895 /*
896  * Decode shader-blocks:
897  */
898 
899 static void
decode_shader_blocks(void)900 decode_shader_blocks(void)
901 {
902    char *type = NULL;
903    uint32_t sizedwords = 0;
904 
905    foreach_line_in_section (line) {
906       if (startswith(line, "  - type:")) {
907          free(type);
908          parseline(line, "  - type: %ms", &type);
909       } else if (startswith(line, "      size:")) {
910          parseline(line, "      size: %u", &sizedwords);
911       } else if (startswith(line, "    data: !!ascii85 |")) {
912          uint32_t *buf = popline_ascii85(sizedwords);
913 
914          /* some of the sections are pretty large, and are (at least
915           * so far) not useful, so skip them if not in verbose mode:
916           */
917          bool dump = verbose || !strcmp(type, "A6XX_SP_INST_DATA") ||
918                      !strcmp(type, "A6XX_HLSQ_INST_RAM") || 0;
919 
920          if (!strcmp(type, "A6XX_SP_INST_DATA") ||
921              !strcmp(type, "A6XX_HLSQ_INST_RAM")) {
922             /* TODO this section actually contains multiple shaders
923              * (or parts of shaders?), so perhaps we should search
924              * for ends of shaders and decode each?
925              */
926             try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id);
927          }
928 
929          if (dump)
930             dump_hex_ascii(buf, 4 * sizedwords, 1);
931 
932          free(buf);
933 
934          continue;
935       }
936 
937       printf("%s", line);
938    }
939 
940    free(type);
941 }
942 
943 /*
944  * Decode debugbus section:
945  */
946 
947 static void
decode_debugbus(void)948 decode_debugbus(void)
949 {
950    char *block = NULL;
951    uint32_t sizedwords = 0;
952 
953    foreach_line_in_section (line) {
954       if (startswith(line, "  - debugbus-block:")) {
955          free(block);
956          parseline(line, "  - debugbus-block: %ms", &block);
957       } else if (startswith(line, "    count:")) {
958          parseline(line, "    count: %u", &sizedwords);
959       } else if (startswith(line, "    data: !!ascii85 |")) {
960          uint32_t *buf = popline_ascii85(sizedwords);
961 
962          /* some of the sections are pretty large, and are (at least
963           * so far) not useful, so skip them if not in verbose mode:
964           */
965          bool dump = verbose || 0;
966 
967          if (dump)
968             dump_hex_ascii(buf, 4 * sizedwords, 1);
969 
970          free(buf);
971 
972          continue;
973       }
974 
975       printf("%s", line);
976    }
977 }
978 
979 /*
980  * Main crashdump decode loop:
981  */
982 
983 static void
decode(void)984 decode(void)
985 {
986    const char *line;
987 
988    while ((line = popline())) {
989       printf("%s", line);
990       if (startswith(line, "revision:")) {
991          parseline(line, "revision: %u", &options.gpu_id);
992          printf("Got gpu_id=%u\n", options.gpu_id);
993 
994          cffdec_init(&options);
995 
996          if (is_a6xx()) {
997             rnn_gmu = rnn_new(!options.color);
998             rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX");
999             rnn_control = rnn_new(!options.color);
1000             rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml",
1001                           "A6XX_CONTROL_REG");
1002             rnn_pipe = rnn_new(!options.color);
1003             rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml",
1004                           "A6XX_PIPE_REG");
1005          } else if (is_a5xx()) {
1006             rnn_control = rnn_new(!options.color);
1007             rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml",
1008                           "A5XX_CONTROL_REG");
1009          } else {
1010             rnn_control = NULL;
1011          }
1012       } else if (startswith(line, "bos:")) {
1013          decode_bos();
1014       } else if (startswith(line, "ringbuffer:")) {
1015          decode_ringbuffer();
1016       } else if (startswith(line, "registers:")) {
1017          decode_registers();
1018 
1019          /* after we've recorded buffer contents, and CP register values,
1020           * we can take a stab at decoding the cmdstream:
1021           */
1022          dump_cmdstream();
1023       } else if (startswith(line, "registers-gmu:")) {
1024          decode_gmu_registers();
1025       } else if (startswith(line, "indexed-registers:")) {
1026          decode_indexed_registers();
1027       } else if (startswith(line, "shader-blocks:")) {
1028          decode_shader_blocks();
1029       } else if (startswith(line, "clusters:")) {
1030          decode_clusters();
1031       } else if (startswith(line, "debugbus:")) {
1032          decode_debugbus();
1033       }
1034    }
1035 }
1036 
1037 /*
1038  * Usage and argument parsing:
1039  */
1040 
1041 static void
usage(void)1042 usage(void)
1043 {
1044    /* clang-format off */
1045    fprintf(stderr, "Usage:\n\n"
1046            "\tcrashdec [-achmsv] [-f FILE]\n\n"
1047            "Options:\n"
1048            "\t-a, --allregs   - show all registers (including ones not written since\n"
1049            "\t                  previous draw) at each draw\n"
1050            "\t-c, --color     - use colors\n"
1051            "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1052            "\t-h, --help      - this usage message\n"
1053            "\t-m, --markers   - try to decode CP_NOP string markers\n"
1054            "\t-s, --summary   - don't show individual register writes, but just show\n"
1055            "\t                  register values on draws\n"
1056            "\t-v, --verbose   - dump more verbose output, including contents of\n"
1057            "\t                  less interesting buffers\n"
1058            "\n"
1059    );
1060    /* clang-format on */
1061    exit(2);
1062 }
1063 
1064 /* clang-format off */
1065 static const struct option opts[] = {
1066       { .name = "allregs", .has_arg = 0, NULL, 'a' },
1067       { .name = "color",   .has_arg = 0, NULL, 'c' },
1068       { .name = "file",    .has_arg = 1, NULL, 'f' },
1069       { .name = "help",    .has_arg = 0, NULL, 'h' },
1070       { .name = "markers", .has_arg = 0, NULL, 'm' },
1071       { .name = "summary", .has_arg = 0, NULL, 's' },
1072       { .name = "verbose", .has_arg = 0, NULL, 'v' },
1073       {}
1074 };
1075 /* clang-format on */
1076 
1077 static bool interactive;
1078 
1079 static void
cleanup(void)1080 cleanup(void)
1081 {
1082    fflush(stdout);
1083 
1084    if (interactive) {
1085       pager_close();
1086    }
1087 }
1088 
1089 int
main(int argc,char ** argv)1090 main(int argc, char **argv)
1091 {
1092    int c;
1093 
1094    interactive = isatty(STDOUT_FILENO);
1095    options.color = interactive;
1096 
1097    /* default to read from stdin: */
1098    in = stdin;
1099 
1100    while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) {
1101       switch (c) {
1102       case 'a':
1103          options.allregs = true;
1104          break;
1105       case 'c':
1106          options.color = true;
1107          break;
1108       case 'f':
1109          in = fopen(optarg, "r");
1110          break;
1111       case 'm':
1112          options.decode_markers = true;
1113          break;
1114       case 's':
1115          options.summary = true;
1116          break;
1117       case 'v':
1118          verbose = true;
1119          break;
1120       case 'h':
1121       default:
1122          usage();
1123       }
1124    }
1125 
1126    disasm_a3xx_set_debug(PRINT_RAW);
1127 
1128    if (interactive) {
1129       pager_open();
1130    }
1131 
1132    atexit(cleanup);
1133 
1134    decode();
1135    cleanup();
1136 }
1137