1 /*
2 * Copyright © 2020 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /*
25 * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang,
26 * the coredump should be found in:
27 *
28 * /sys/class/devcoredump/devcd<n>/data
29 *
30 * The crashdump will hang around for 5min, it can be cleared by writing to
31 * the file, ie:
32 *
33 * echo 1 > /sys/class/devcoredump/devcd<n>/data
34 *
35 * (the driver won't log any new crashdumps until the previous one is cleared
36 * or times out after 5min)
37 */
38
39 #include <assert.h>
40 #include <getopt.h>
41 #include <inttypes.h>
42 #include <stdarg.h>
43 #include <stdbool.h>
44 #include <stdint.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49
50 #include "freedreno_pm4.h"
51
52 #include "ir3/instr-a3xx.h"
53 #include "buffers.h"
54 #include "cffdec.h"
55 #include "disasm.h"
56 #include "pager.h"
57 #include "rnnutil.h"
58 #include "util.h"
59
60 static FILE *in;
61 static bool verbose;
62
63 static struct rnn *rnn_gmu;
64 static struct rnn *rnn_control;
65 static struct rnn *rnn_pipe;
66
67 static struct cffdec_options options = {
68 .draw_filter = -1,
69 };
70
71 static inline bool
is_a6xx(void)72 is_a6xx(void)
73 {
74 return (600 <= options.gpu_id) && (options.gpu_id < 700);
75 }
76 static inline bool
is_a5xx(void)77 is_a5xx(void)
78 {
79 return (500 <= options.gpu_id) && (options.gpu_id < 600);
80 }
81 static inline bool
is_64b(void)82 is_64b(void)
83 {
84 return options.gpu_id >= 500;
85 }
86
87 /*
88 * Helpers to read register values:
89 */
90
91 /* read registers that are 64b on 64b GPUs (ie. a5xx+) */
92 static uint64_t
regval64(const char * name)93 regval64(const char *name)
94 {
95 unsigned reg = regbase(name);
96 assert(reg);
97 uint64_t val = reg_val(reg);
98 if (is_64b())
99 val |= ((uint64_t)reg_val(reg + 1)) << 32;
100 return val;
101 }
102
103 static uint32_t
regval(const char * name)104 regval(const char *name)
105 {
106 unsigned reg = regbase(name);
107 assert(reg);
108 return reg_val(reg);
109 }
110
111 /*
112 * Line reading and string helpers:
113 */
114
115 static char *
replacestr(char * line,const char * find,const char * replace)116 replacestr(char *line, const char *find, const char *replace)
117 {
118 char *tail, *s;
119
120 if (!(s = strstr(line, find)))
121 return line;
122
123 tail = s + strlen(find);
124
125 char *newline;
126 asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail);
127 free(line);
128
129 return newline;
130 }
131
132 static char *lastline;
133 static char *pushedline;
134
135 static const char *
popline(void)136 popline(void)
137 {
138 char *r = pushedline;
139
140 if (r) {
141 pushedline = NULL;
142 return r;
143 }
144
145 free(lastline);
146
147 size_t n = 0;
148 if (getline(&r, &n, in) < 0)
149 exit(0);
150
151 /* Handle section name typo's from earlier kernels: */
152 r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL");
153 r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT");
154
155 lastline = r;
156 return r;
157 }
158
159 static void
pushline(void)160 pushline(void)
161 {
162 assert(!pushedline);
163 pushedline = lastline;
164 }
165
166 static uint32_t *
popline_ascii85(uint32_t sizedwords)167 popline_ascii85(uint32_t sizedwords)
168 {
169 const char *line = popline();
170
171 /* At this point we exepct the ascii85 data to be indented *some*
172 * amount, and to terminate at the end of the line. So just eat
173 * up the leading whitespace.
174 */
175 assert(*line == ' ');
176 while (*line == ' ')
177 line++;
178
179 uint32_t *buf = calloc(1, 4 * sizedwords);
180 int idx = 0;
181
182 while (*line != '\n') {
183 if (*line == 'z') {
184 buf[idx++] = 0;
185 line++;
186 continue;
187 }
188
189 uint32_t accum = 0;
190 for (int i = 0; (i < 5) && (*line != '\n'); i++) {
191 accum *= 85;
192 accum += *line - '!';
193 line++;
194 }
195
196 buf[idx++] = accum;
197 }
198
199 return buf;
200 }
201
202 static bool
startswith(const char * line,const char * start)203 startswith(const char *line, const char *start)
204 {
205 return strstr(line, start) == line;
206 }
207
208 static void
parseline(const char * line,const char * fmt,...)209 parseline(const char *line, const char *fmt, ...)
210 {
211 int fmtlen = strlen(fmt);
212 int n = 0;
213 int l = 0;
214
215 /* scan fmt string to extract expected # of conversions: */
216 for (int i = 0; i < fmtlen; i++) {
217 if (fmt[i] == '%') {
218 if (i == (l - 1)) { /* prev char was %, ie. we have %% */
219 n--;
220 l = 0;
221 } else {
222 n++;
223 l = i;
224 }
225 }
226 }
227
228 va_list ap;
229 va_start(ap, fmt);
230 if (vsscanf(line, fmt, ap) != n) {
231 fprintf(stderr, "parse error scanning: '%s'\n", fmt);
232 exit(1);
233 }
234 va_end(ap);
235 }
236
237 #define foreach_line_in_section(_line) \
238 for (const char *_line = popline(); _line; _line = popline()) \
239 /* check for start of next section */ \
240 if (_line[0] != ' ') { \
241 pushline(); \
242 break; \
243 } else
244
245 /*
246 * Decode ringbuffer section:
247 */
248
249 static struct {
250 uint64_t iova;
251 uint32_t rptr;
252 uint32_t wptr;
253 uint32_t size;
254 uint32_t *buf;
255 } ringbuffers[5];
256
257 static void
decode_ringbuffer(void)258 decode_ringbuffer(void)
259 {
260 int id = 0;
261
262 foreach_line_in_section (line) {
263 if (startswith(line, " - id:")) {
264 parseline(line, " - id: %d", &id);
265 assert(id < ARRAY_SIZE(ringbuffers));
266 } else if (startswith(line, " iova:")) {
267 parseline(line, " iova: %" PRIx64, &ringbuffers[id].iova);
268 } else if (startswith(line, " rptr:")) {
269 parseline(line, " rptr: %d", &ringbuffers[id].rptr);
270 } else if (startswith(line, " wptr:")) {
271 parseline(line, " wptr: %d", &ringbuffers[id].wptr);
272 } else if (startswith(line, " size:")) {
273 parseline(line, " size: %d", &ringbuffers[id].size);
274 } else if (startswith(line, " data: !!ascii85 |")) {
275 ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4);
276 add_buffer(ringbuffers[id].iova, ringbuffers[id].size,
277 ringbuffers[id].buf);
278 continue;
279 }
280
281 printf("%s", line);
282 }
283 }
284
285 static bool
valid_header(uint32_t pkt)286 valid_header(uint32_t pkt)
287 {
288 if (options.gpu_id >= 500) {
289 return pkt_is_type4(pkt) || pkt_is_type7(pkt);
290 } else {
291 /* TODO maybe we can check validish looking pkt3 opc or pkt0
292 * register offset.. the cmds sent by kernel are usually
293 * fairly limited (other than initialization) which confines
294 * the search space a bit..
295 */
296 return true;
297 }
298 }
299
300 static void
dump_cmdstream(void)301 dump_cmdstream(void)
302 {
303 uint64_t rb_base = regval64("CP_RB_BASE");
304
305 printf("got rb_base=%" PRIx64 "\n", rb_base);
306
307 options.ibs[1].base = regval64("CP_IB1_BASE");
308 options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
309 options.ibs[2].base = regval64("CP_IB2_BASE");
310 options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
311
312 /* Adjust remaining size to account for cmdstream slurped into ROQ
313 * but not yet consumed by SQE
314 *
315 * TODO add support for earlier GPUs once we tease out the needed
316 * registers.. see crashit.c in msmtest for hints.
317 *
318 * TODO it would be nice to be able to extract out register bitfields
319 * by name rather than hard-coding this.
320 */
321 if (is_a6xx()) {
322 options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
323 options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
324 }
325
326 printf("IB1: %" PRIx64 ", %u\n", options.ibs[1].base, options.ibs[1].rem);
327 printf("IB2: %" PRIx64 ", %u\n", options.ibs[2].base, options.ibs[2].rem);
328
329 /* now that we've got the regvals we want, reset register state
330 * so we aren't seeing values from decode_registers();
331 */
332 reset_regs();
333
334 for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) {
335 if (ringbuffers[id].iova != rb_base)
336 continue;
337 if (!ringbuffers[id].size)
338 continue;
339
340 printf("found ring!\n");
341
342 /* The kernel level ringbuffer (RB) wraps around, which
343 * cffdec doesn't really deal with.. so figure out how
344 * many dwords are unread
345 */
346 unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */
347
348 if (verbose) {
349 dump_commands(ringbuffers[id].buf, ringszdw, 0);
350 return;
351 }
352
353 /* helper macro to deal with modulo size math: */
354 #define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
355
356 /* The rptr will (most likely) have moved past the IB to
357 * userspace cmdstream, so back up a bit, and then advance
358 * until we find a valid start of a packet.. this is going
359 * to be less reliable on a4xx and before (pkt0/pkt3),
360 * compared to pkt4/pkt7 with parity bits
361 */
362 const int lookback = 12;
363 unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback);
364
365 for (int idx = 0; idx < lookback; idx++) {
366 if (valid_header(ringbuffers[id].buf[rptr]))
367 break;
368 rptr = mod_add(rptr, 1);
369 }
370
371 unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr);
372
373 printf("got cmdszdw=%d\n", cmdszdw);
374 uint32_t *buf = malloc(cmdszdw * 4);
375
376 for (int idx = 0; idx < cmdszdw; idx++) {
377 int p = mod_add(rptr, idx);
378 buf[idx] = ringbuffers[id].buf[p];
379 }
380
381 dump_commands(buf, cmdszdw, 0);
382 free(buf);
383 }
384 }
385
386 /*
387 * Decode 'bos' (buffers) section:
388 */
389
390 static void
decode_bos(void)391 decode_bos(void)
392 {
393 uint32_t size = 0;
394 uint64_t iova = 0;
395
396 foreach_line_in_section (line) {
397 if (startswith(line, " - iova:")) {
398 parseline(line, " - iova: %" PRIx64, &iova);
399 } else if (startswith(line, " size:")) {
400 parseline(line, " size: %u", &size);
401 } else if (startswith(line, " data: !!ascii85 |")) {
402 uint32_t *buf = popline_ascii85(size / 4);
403
404 if (verbose)
405 dump_hex_ascii(buf, size, 1);
406
407 add_buffer(iova, size, buf);
408
409 continue;
410 }
411
412 printf("%s", line);
413 }
414 }
415
416 /*
417 * Decode registers section:
418 */
419
420 static void
dump_register(struct rnn * rnn,uint32_t offset,uint32_t value)421 dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
422 {
423 struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
424 if (info && info->typeinfo) {
425 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
426 printf("%s: %s\n", info->name, decoded);
427 } else if (info) {
428 printf("%s: %08x\n", info->name, value);
429 } else {
430 printf("<%04x>: %08x\n", offset, value);
431 }
432 }
433
434 static void
decode_gmu_registers(void)435 decode_gmu_registers(void)
436 {
437 foreach_line_in_section (line) {
438 uint32_t offset, value;
439 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
440
441 printf("\t%08x\t", value);
442 dump_register(rnn_gmu, offset / 4, value);
443 }
444 }
445
446 static void
decode_registers(void)447 decode_registers(void)
448 {
449 foreach_line_in_section (line) {
450 uint32_t offset, value;
451 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
452
453 reg_set(offset / 4, value);
454 printf("\t%08x", value);
455 dump_register_val(offset / 4, value, 0);
456 }
457 }
458
459 /* similar to registers section, but for banked context regs: */
460 static void
decode_clusters(void)461 decode_clusters(void)
462 {
463 foreach_line_in_section (line) {
464 if (startswith(line, " - cluster-name:") ||
465 startswith(line, " - context:")) {
466 printf("%s", line);
467 continue;
468 }
469
470 uint32_t offset, value;
471 parseline(line, " - { offset: %x, value: %x }", &offset, &value);
472
473 printf("\t%08x", value);
474 dump_register_val(offset / 4, value, 0);
475 }
476 }
477
478 /*
479 * Decode indexed-registers.. these aren't like normal registers, but a
480 * sort of FIFO where successive reads pop out associated debug state.
481 */
482
483 static void
dump_cp_sqe_stat(uint32_t * stat)484 dump_cp_sqe_stat(uint32_t *stat)
485 {
486 printf("\t PC: %04x\n", stat[0]);
487 stat++;
488
489 if (is_a6xx() && valid_header(stat[0])) {
490 if (pkt_is_type7(stat[0])) {
491 unsigned opc = cp_type7_opcode(stat[0]);
492 const char *name = pktname(opc);
493 if (name)
494 printf("\tPKT: %s\n", name);
495 } else {
496 /* Not sure if this case can happen: */
497 }
498 }
499
500 for (int i = 0; i < 16; i++) {
501 printf("\t$%02x: %08x\t\t$%02x: %08x\n", i + 1, stat[i], i + 16 + 1,
502 stat[i + 16]);
503 }
504 }
505
506 static void
dump_control_regs(uint32_t * regs)507 dump_control_regs(uint32_t *regs)
508 {
509 if (!rnn_control)
510 return;
511
512 /* Control regs 0x100-0x17f are a scratch space to be used by the
513 * firmware however it wants, unlike lower regs which involve some
514 * fixed-function units. Therefore only these registers get dumped
515 * directly.
516 */
517 for (uint32_t i = 0; i < 0x80; i++) {
518 printf("\t%08x\t", regs[i]);
519 dump_register(rnn_control, i + 0x100, regs[i]);
520 }
521 }
522
523 static void
dump_cp_ucode_dbg(uint32_t * dbg)524 dump_cp_ucode_dbg(uint32_t *dbg)
525 {
526 /* Notes on the data:
527 * There seems to be a section every 4096 DWORD's. The sections aren't
528 * all the same size, so the rest of the 4096 DWORD's are filled with
529 * mirrors of the actual data.
530 */
531
532 for (int section = 0; section < 6; section++, dbg += 0x1000) {
533 switch (section) {
534 case 0:
535 /* Contains scattered data from a630_sqe.fw: */
536 printf("\tSQE instruction cache:\n");
537 dump_hex_ascii(dbg, 4 * 0x400, 1);
538 break;
539 case 1:
540 printf("\tUnknown 1:\n");
541 dump_hex_ascii(dbg, 4 * 0x80, 1);
542 break;
543 case 2:
544 printf("\tUnknown 2:\n");
545 dump_hex_ascii(dbg, 4 * 0x200, 1);
546 break;
547 case 3:
548 printf("\tUnknown 3:\n");
549 dump_hex_ascii(dbg, 4 * 0x80, 1);
550 break;
551 case 4:
552 /* Don't bother printing this normally */
553 if (verbose) {
554 printf("\tSQE packet jumptable contents:\n");
555 dump_hex_ascii(dbg, 4 * 0x80, 1);
556 }
557 break;
558 case 5:
559 printf("\tSQE scratch control regs:\n");
560 dump_control_regs(dbg);
561 break;
562 }
563 }
564 }
565
566 static void
dump_mem_pool_reg_write(unsigned reg,uint32_t data,unsigned context,bool pipe)567 dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context,
568 bool pipe)
569 {
570 if (pipe) {
571 struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
572 printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
573
574 if (!strcmp(info->typeinfo->name, "void")) {
575 /* registers that ignore their payload */
576 } else {
577 printf("\t\t\t");
578 dump_register(rnn_pipe, reg, data);
579 }
580 } else {
581 printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
582 dump_register_val(reg, data, 2);
583 }
584 }
585
586 static void
dump_mem_pool_chunk(const uint32_t * chunk)587 dump_mem_pool_chunk(const uint32_t *chunk)
588 {
589 struct __attribute__((packed)) {
590 bool reg0_enabled : 1;
591 bool reg1_enabled : 1;
592 uint32_t data0 : 32;
593 uint32_t data1 : 32;
594 uint32_t reg0 : 18;
595 uint32_t reg1 : 18;
596 bool reg0_pipe : 1;
597 bool reg1_pipe : 1;
598 uint32_t reg0_context : 1;
599 uint32_t reg1_context : 1;
600 uint32_t padding : 22;
601 } fields;
602
603 memcpy(&fields, chunk, 4 * sizeof(uint32_t));
604
605 if (fields.reg0_enabled) {
606 dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context,
607 fields.reg0_pipe);
608 }
609
610 if (fields.reg1_enabled) {
611 dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context,
612 fields.reg1_pipe);
613 }
614 }
615
616 static void
dump_cp_mem_pool(uint32_t * mempool)617 dump_cp_mem_pool(uint32_t *mempool)
618 {
619 /* The mem pool is a shared pool of memory used for storing in-flight
620 * register writes. There are 6 different queues, one for each
621 * cluster. Writing to $data (or for some special registers, $addr)
622 * pushes data onto the appropriate queue, and each queue is pulled
623 * from by the appropriate cluster. The queues are thus written to
624 * in-order, but may be read out-of-order.
625 *
626 * The queues are conceptually divided into 128-bit "chunks", and the
627 * read and write pointers are in units of chunks. These chunks are
628 * organized internally into 8-chunk "blocks", and memory is allocated
629 * dynamically in terms of blocks. Each queue is represented as a
630 * singly-linked list of blocks, as well as 3-bit start/end chunk
631 * pointers that point within the first/last block. The next pointers
632 * are located in a separate array, rather than inline.
633 */
634
635 /* TODO: The firmware CP_MEM_POOL save/restore routines do something
636 * like:
637 *
638 * cread $02, [ $00 + 0 ]
639 * and $02, $02, 0x118
640 * ...
641 * brne $02, 0, #label
642 * mov $03, 0x2000
643 * mov $03, 0x1000
644 * label:
645 * ...
646 *
647 * I think that control register 0 is the GPU version, and some
648 * versions have a smaller mem pool. It seems some models have a mem
649 * pool that's half the size, and a bunch of offsets are shifted
650 * accordingly. Unfortunately the kernel driver's dumping code doesn't
651 * seem to take this into account, even the downstream android driver,
652 * and we don't know which versions 0x8, 0x10, or 0x100 correspond
653 * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
654 */
655 bool small_mem_pool = false;
656
657 /* The array of next pointers for each block. */
658 const uint32_t *next_pointers =
659 small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
660
661 /* Maximum number of blocks in the pool, also the size of the pointers
662 * array.
663 */
664 const int num_blocks = small_mem_pool ? 0x30 : 0x80;
665
666 /* Number of queues */
667 const unsigned num_queues = 6;
668
669 /* Unfortunately the per-queue state is a little more complicated than
670 * a simple pair of begin/end pointers. Instead of a single beginning
671 * block, there are *two*, with the property that either the two are
672 * equal or the second is the "next" of the first. Similarly there are
673 * two end blocks. Thus the queue either looks like this:
674 *
675 * A -> B -> ... -> C -> D
676 *
677 * Or like this, or some combination:
678 *
679 * A/B -> ... -> C/D
680 *
681 * However, there's only one beginning/end chunk offset. Now the
682 * question is, which of A or B is the actual start? I.e. is the chunk
683 * offset an offset inside A or B? It depends. I'll show a typical read
684 * cycle, starting here (read pointer marked with a *) with a chunk
685 * offset of 0:
686 *
687 * A B
688 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
689 * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
690 *
691 * Once the pointer advances far enough, the hardware decides to free
692 * A, after which the read-side state looks like:
693 *
694 * (free) A/B
695 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
696 * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
697 *
698 * Then after advancing the pointer a bit more, the hardware fetches
699 * the "next" pointer for A and stores it in B:
700 *
701 * (free) A B
702 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
703 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
704 *
705 * Then the read pointer advances into B, at which point we've come
706 * back to the first state having advanced a whole block:
707 *
708 * (free) A B
709 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
710 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
711 *
712 *
713 * There is a similar cycle for the write pointer. Now, the question
714 * is, how do we know which state we're in? We need to know this to
715 * know whether the pointer (*) is in A or B if they're different. It
716 * seems like there should be some bit somewhere describing this, but
717 * after lots of experimentation I've come up empty-handed. For now we
718 * assume that if the pointer is in the first half, then we're in
719 * either the first or second state and use B, and otherwise we're in
720 * the second or third state and use A. So far I haven't seen anything
721 * that violates this assumption.
722 */
723
724 struct {
725 uint32_t unk0;
726 uint32_t padding0[7]; /* Mirrors of unk0 */
727
728 struct {
729 uint32_t chunk : 3;
730 uint32_t first_block : 32 - 3;
731 } writer[6];
732 uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
733
734 uint32_t unk1;
735 uint32_t padding2[7]; /* Mirrors of unk1 */
736
737 uint32_t writer_second_block[6];
738 uint32_t padding3[2];
739
740 uint32_t unk2[6];
741 uint32_t padding4[2];
742
743 struct {
744 uint32_t chunk : 3;
745 uint32_t first_block : 32 - 3;
746 } reader[6];
747 uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
748
749 uint32_t unk3;
750 uint32_t padding6[7]; /* Mirrors of unk3 */
751
752 uint32_t reader_second_block[6];
753 uint32_t padding7[2];
754
755 uint32_t block_count[6];
756 uint32_t padding[2];
757
758 uint32_t unk4;
759 uint32_t padding9[7]; /* Mirrors of unk4 */
760 } data1;
761
762 const uint32_t *data1_ptr =
763 small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
764 memcpy(&data1, data1_ptr, sizeof(data1));
765
766 /* Based on the kernel, the first dword is the mem pool size (in
767 * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
768 */
769 const uint32_t *data2_ptr =
770 small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
771 const int data2_size = 0x60;
772
773 /* This seems to be the size of each queue in chunks. */
774 const uint32_t *queue_sizes = &data2_ptr[0x18];
775
776 printf("\tdata2:\n");
777 dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
778
779 /* These seem to be some kind of counter of allocated/deallocated blocks */
780 if (verbose) {
781 printf("\tunk0: %x\n", data1.unk0);
782 printf("\tunk1: %x\n", data1.unk1);
783 printf("\tunk3: %x\n", data1.unk3);
784 printf("\tunk4: %x\n\n", data1.unk4);
785 }
786
787 for (int queue = 0; queue < num_queues; queue++) {
788 const char *cluster_names[6] = {"FE", "SP_VS", "PC_VS",
789 "GRAS", "SP_PS", "PS"};
790 printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
791
792 if (verbose) {
793 printf("\t\twriter_first_block: 0x%x\n",
794 data1.writer[queue].first_block);
795 printf("\t\twriter_second_block: 0x%x\n",
796 data1.writer_second_block[queue]);
797 printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
798 printf("\t\treader_first_block: 0x%x\n",
799 data1.reader[queue].first_block);
800 printf("\t\treader_second_block: 0x%x\n",
801 data1.reader_second_block[queue]);
802 printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
803 printf("\t\tblock_count: %d\n", data1.block_count[queue]);
804 printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
805 printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
806 }
807
808 uint32_t cur_chunk = data1.reader[queue].chunk;
809 uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block
810 : data1.reader_second_block[queue];
811 uint32_t last_chunk = data1.writer[queue].chunk;
812 uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block
813 : data1.writer_second_block[queue];
814
815 if (verbose)
816 printf("\tblock %x\n", cur_block);
817 if (cur_block >= num_blocks) {
818 fprintf(stderr, "block %x too large\n", cur_block);
819 exit(1);
820 }
821 unsigned calculated_queue_size = 0;
822 while (cur_block != last_block || cur_chunk != last_chunk) {
823 calculated_queue_size++;
824 uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
825
826 dump_mem_pool_chunk(chunk_ptr);
827
828 printf("\t%05x: %08x %08x %08x %08x\n",
829 4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0],
830 chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
831
832 cur_chunk++;
833 if (cur_chunk == 8) {
834 cur_block = next_pointers[cur_block];
835 if (verbose)
836 printf("\tblock %x\n", cur_block);
837 if (cur_block >= num_blocks) {
838 fprintf(stderr, "block %x too large\n", cur_block);
839 exit(1);
840 }
841 cur_chunk = 0;
842 }
843 }
844 if (calculated_queue_size != queue_sizes[queue]) {
845 printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n",
846 calculated_queue_size);
847 }
848 printf("\n");
849 }
850 }
851
852 static void
decode_indexed_registers(void)853 decode_indexed_registers(void)
854 {
855 char *name = NULL;
856 uint32_t sizedwords = 0;
857
858 foreach_line_in_section (line) {
859 if (startswith(line, " - regs-name:")) {
860 free(name);
861 parseline(line, " - regs-name: %ms", &name);
862 } else if (startswith(line, " dwords:")) {
863 parseline(line, " dwords: %u", &sizedwords);
864 } else if (startswith(line, " data: !!ascii85 |")) {
865 uint32_t *buf = popline_ascii85(sizedwords);
866
867 /* some of the sections are pretty large, and are (at least
868 * so far) not useful, so skip them if not in verbose mode:
869 */
870 bool dump = verbose || !strcmp(name, "CP_SQE_STAT") ||
871 !strcmp(name, "CP_DRAW_STATE") ||
872 !strcmp(name, "CP_ROQ") || 0;
873
874 if (!strcmp(name, "CP_SQE_STAT"))
875 dump_cp_sqe_stat(buf);
876
877 if (!strcmp(name, "CP_UCODE_DBG_DATA"))
878 dump_cp_ucode_dbg(buf);
879
880 if (!strcmp(name, "CP_MEMPOOL"))
881 dump_cp_mem_pool(buf);
882
883 if (dump)
884 dump_hex_ascii(buf, 4 * sizedwords, 1);
885
886 free(buf);
887
888 continue;
889 }
890
891 printf("%s", line);
892 }
893 }
894
895 /*
896 * Decode shader-blocks:
897 */
898
899 static void
decode_shader_blocks(void)900 decode_shader_blocks(void)
901 {
902 char *type = NULL;
903 uint32_t sizedwords = 0;
904
905 foreach_line_in_section (line) {
906 if (startswith(line, " - type:")) {
907 free(type);
908 parseline(line, " - type: %ms", &type);
909 } else if (startswith(line, " size:")) {
910 parseline(line, " size: %u", &sizedwords);
911 } else if (startswith(line, " data: !!ascii85 |")) {
912 uint32_t *buf = popline_ascii85(sizedwords);
913
914 /* some of the sections are pretty large, and are (at least
915 * so far) not useful, so skip them if not in verbose mode:
916 */
917 bool dump = verbose || !strcmp(type, "A6XX_SP_INST_DATA") ||
918 !strcmp(type, "A6XX_HLSQ_INST_RAM") || 0;
919
920 if (!strcmp(type, "A6XX_SP_INST_DATA") ||
921 !strcmp(type, "A6XX_HLSQ_INST_RAM")) {
922 /* TODO this section actually contains multiple shaders
923 * (or parts of shaders?), so perhaps we should search
924 * for ends of shaders and decode each?
925 */
926 try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id);
927 }
928
929 if (dump)
930 dump_hex_ascii(buf, 4 * sizedwords, 1);
931
932 free(buf);
933
934 continue;
935 }
936
937 printf("%s", line);
938 }
939
940 free(type);
941 }
942
943 /*
944 * Decode debugbus section:
945 */
946
947 static void
decode_debugbus(void)948 decode_debugbus(void)
949 {
950 char *block = NULL;
951 uint32_t sizedwords = 0;
952
953 foreach_line_in_section (line) {
954 if (startswith(line, " - debugbus-block:")) {
955 free(block);
956 parseline(line, " - debugbus-block: %ms", &block);
957 } else if (startswith(line, " count:")) {
958 parseline(line, " count: %u", &sizedwords);
959 } else if (startswith(line, " data: !!ascii85 |")) {
960 uint32_t *buf = popline_ascii85(sizedwords);
961
962 /* some of the sections are pretty large, and are (at least
963 * so far) not useful, so skip them if not in verbose mode:
964 */
965 bool dump = verbose || 0;
966
967 if (dump)
968 dump_hex_ascii(buf, 4 * sizedwords, 1);
969
970 free(buf);
971
972 continue;
973 }
974
975 printf("%s", line);
976 }
977 }
978
979 /*
980 * Main crashdump decode loop:
981 */
982
983 static void
decode(void)984 decode(void)
985 {
986 const char *line;
987
988 while ((line = popline())) {
989 printf("%s", line);
990 if (startswith(line, "revision:")) {
991 parseline(line, "revision: %u", &options.gpu_id);
992 printf("Got gpu_id=%u\n", options.gpu_id);
993
994 cffdec_init(&options);
995
996 if (is_a6xx()) {
997 rnn_gmu = rnn_new(!options.color);
998 rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX");
999 rnn_control = rnn_new(!options.color);
1000 rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml",
1001 "A6XX_CONTROL_REG");
1002 rnn_pipe = rnn_new(!options.color);
1003 rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml",
1004 "A6XX_PIPE_REG");
1005 } else if (is_a5xx()) {
1006 rnn_control = rnn_new(!options.color);
1007 rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml",
1008 "A5XX_CONTROL_REG");
1009 } else {
1010 rnn_control = NULL;
1011 }
1012 } else if (startswith(line, "bos:")) {
1013 decode_bos();
1014 } else if (startswith(line, "ringbuffer:")) {
1015 decode_ringbuffer();
1016 } else if (startswith(line, "registers:")) {
1017 decode_registers();
1018
1019 /* after we've recorded buffer contents, and CP register values,
1020 * we can take a stab at decoding the cmdstream:
1021 */
1022 dump_cmdstream();
1023 } else if (startswith(line, "registers-gmu:")) {
1024 decode_gmu_registers();
1025 } else if (startswith(line, "indexed-registers:")) {
1026 decode_indexed_registers();
1027 } else if (startswith(line, "shader-blocks:")) {
1028 decode_shader_blocks();
1029 } else if (startswith(line, "clusters:")) {
1030 decode_clusters();
1031 } else if (startswith(line, "debugbus:")) {
1032 decode_debugbus();
1033 }
1034 }
1035 }
1036
1037 /*
1038 * Usage and argument parsing:
1039 */
1040
1041 static void
usage(void)1042 usage(void)
1043 {
1044 /* clang-format off */
1045 fprintf(stderr, "Usage:\n\n"
1046 "\tcrashdec [-achmsv] [-f FILE]\n\n"
1047 "Options:\n"
1048 "\t-a, --allregs - show all registers (including ones not written since\n"
1049 "\t previous draw) at each draw\n"
1050 "\t-c, --color - use colors\n"
1051 "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1052 "\t-h, --help - this usage message\n"
1053 "\t-m, --markers - try to decode CP_NOP string markers\n"
1054 "\t-s, --summary - don't show individual register writes, but just show\n"
1055 "\t register values on draws\n"
1056 "\t-v, --verbose - dump more verbose output, including contents of\n"
1057 "\t less interesting buffers\n"
1058 "\n"
1059 );
1060 /* clang-format on */
1061 exit(2);
1062 }
1063
1064 /* clang-format off */
1065 static const struct option opts[] = {
1066 { .name = "allregs", .has_arg = 0, NULL, 'a' },
1067 { .name = "color", .has_arg = 0, NULL, 'c' },
1068 { .name = "file", .has_arg = 1, NULL, 'f' },
1069 { .name = "help", .has_arg = 0, NULL, 'h' },
1070 { .name = "markers", .has_arg = 0, NULL, 'm' },
1071 { .name = "summary", .has_arg = 0, NULL, 's' },
1072 { .name = "verbose", .has_arg = 0, NULL, 'v' },
1073 {}
1074 };
1075 /* clang-format on */
1076
1077 static bool interactive;
1078
1079 static void
cleanup(void)1080 cleanup(void)
1081 {
1082 fflush(stdout);
1083
1084 if (interactive) {
1085 pager_close();
1086 }
1087 }
1088
1089 int
main(int argc,char ** argv)1090 main(int argc, char **argv)
1091 {
1092 int c;
1093
1094 interactive = isatty(STDOUT_FILENO);
1095 options.color = interactive;
1096
1097 /* default to read from stdin: */
1098 in = stdin;
1099
1100 while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) {
1101 switch (c) {
1102 case 'a':
1103 options.allregs = true;
1104 break;
1105 case 'c':
1106 options.color = true;
1107 break;
1108 case 'f':
1109 in = fopen(optarg, "r");
1110 break;
1111 case 'm':
1112 options.decode_markers = true;
1113 break;
1114 case 's':
1115 options.summary = true;
1116 break;
1117 case 'v':
1118 verbose = true;
1119 break;
1120 case 'h':
1121 default:
1122 usage();
1123 }
1124 }
1125
1126 disasm_a3xx_set_debug(PRINT_RAW);
1127
1128 if (interactive) {
1129 pager_open();
1130 }
1131
1132 atexit(cleanup);
1133
1134 decode();
1135 cleanup();
1136 }
1137