1 /*
2  * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <signal.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41 
42 #include "freedreno_pm4.h"
43 
44 #include "buffers.h"
45 #include "cffdec.h"
46 #include "disasm.h"
47 #include "redump.h"
48 #include "rnnutil.h"
49 #include "script.h"
50 
51 /* ************************************************************************* */
52 /* originally based on kernel recovery dump code: */
53 
54 static const struct cffdec_options *options;
55 
56 static bool needs_wfi = false;
57 static bool summary = false;
58 static bool in_summary = false;
59 static int vertices;
60 
61 static inline unsigned
regcnt(void)62 regcnt(void)
63 {
64    if (options->gpu_id >= 500)
65       return 0xffff;
66    else
67       return 0x7fff;
68 }
69 
70 static int
is_64b(void)71 is_64b(void)
72 {
73    return options->gpu_id >= 500;
74 }
75 
76 static int draws[4];
77 static struct {
78    uint64_t base;
79    uint32_t size; /* in dwords */
80    /* Generally cmdstream consists of multiple IB calls to different
81     * buffers, which are themselves often re-used for each tile.  The
82     * triggered flag serves two purposes to help make it more clear
83     * what part of the cmdstream is before vs after the the GPU hang:
84     *
85     * 1) if in IB2 we are passed the point within the IB2 buffer where
86     *    the GPU hung, but IB1 is not passed the point within its
87     *    buffer where the GPU had hung, then we know the GPU hang
88     *    happens on a future use of that IB2 buffer.
89     *
90     * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
91     *    hung, but we've already passed the trigger point at the same
92     *    IB level, we know that we are passed the point where the GPU
93     *    had hung.
94     *
95     * So this is a one way switch, false->true.  And a higher #'d
96     * IB level isn't considered triggered unless the lower #'d IB
97     * level is.
98     */
99    bool triggered;
100 } ibs[4];
101 static int ib;
102 
103 static int draw_count;
104 static int current_draw_count;
105 
106 /* query mode.. to handle symbolic register name queries, we need to
107  * defer parsing query string until after gpu_id is know and rnn db
108  * loaded:
109  */
110 static int *queryvals;
111 
112 static bool
quiet(int lvl)113 quiet(int lvl)
114 {
115    if ((options->draw_filter != -1) &&
116        (options->draw_filter != current_draw_count))
117       return true;
118    if ((lvl >= 3) && (summary || options->querystrs || options->script))
119       return true;
120    if ((lvl >= 2) && (options->querystrs || options->script))
121       return true;
122    return false;
123 }
124 
125 void
printl(int lvl,const char * fmt,...)126 printl(int lvl, const char *fmt, ...)
127 {
128    va_list args;
129    if (quiet(lvl))
130       return;
131    va_start(args, fmt);
132    vprintf(fmt, args);
133    va_end(args);
134 }
135 
136 static const char *levels[] = {
137    "\t",
138    "\t\t",
139    "\t\t\t",
140    "\t\t\t\t",
141    "\t\t\t\t\t",
142    "\t\t\t\t\t\t",
143    "\t\t\t\t\t\t\t",
144    "\t\t\t\t\t\t\t\t",
145    "\t\t\t\t\t\t\t\t\t",
146    "x",
147    "x",
148    "x",
149    "x",
150    "x",
151    "x",
152 };
153 
154 enum state_src_t {
155    STATE_SRC_DIRECT,
156    STATE_SRC_INDIRECT,
157    STATE_SRC_BINDLESS,
158 };
159 
160 /* SDS (CP_SET_DRAW_STATE) helpers: */
161 static void load_all_groups(int level);
162 static void disable_all_groups(void);
163 
164 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
165                           int level);
166 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
167 
168 static bool
highlight_gpuaddr(uint64_t gpuaddr)169 highlight_gpuaddr(uint64_t gpuaddr)
170 {
171    if (!options->ibs[ib].base)
172       return false;
173 
174    if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered)
175       return false;
176 
177    if (ibs[ib].triggered)
178       return options->color;
179 
180    if (options->ibs[ib].base != ibs[ib].base)
181       return false;
182 
183    uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
184    uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
185 
186    bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
187 
188    ibs[ib].triggered |= triggered;
189 
190    if (triggered)
191       printf("ESTIMATED CRASH LOCATION!\n");
192 
193    return triggered & options->color;
194 }
195 
196 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)197 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
198 {
199    int i, j;
200    int lastzero = 1;
201 
202    if (quiet(2))
203       return;
204 
205    for (i = 0; i < sizedwords; i += 8) {
206       int zero = 1;
207 
208       /* always show first row: */
209       if (i == 0)
210          zero = 0;
211 
212       for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
213          if (dwords[i + j])
214             zero = 0;
215 
216       if (zero && !lastzero)
217          printf("*\n");
218 
219       lastzero = zero;
220 
221       if (zero)
222          continue;
223 
224       uint64_t addr = gpuaddr(&dwords[i]);
225       bool highlight = highlight_gpuaddr(addr);
226 
227       if (highlight)
228          printf("\x1b[0;1;31m");
229 
230       if (is_64b()) {
231          printf("%016" PRIx64 ":%s", addr, levels[level]);
232       } else {
233          printf("%08x:%s", (uint32_t)addr, levels[level]);
234       }
235 
236       if (highlight)
237          printf("\x1b[0m");
238 
239       printf("%04x:", i * 4);
240 
241       for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
242          printf(" %08x", dwords[i + j]);
243       }
244 
245       printf("\n");
246    }
247 }
248 
249 static void
dump_float(float * dwords,uint32_t sizedwords,int level)250 dump_float(float *dwords, uint32_t sizedwords, int level)
251 {
252    int i;
253    for (i = 0; i < sizedwords; i++) {
254       if ((i % 8) == 0) {
255          if (is_64b()) {
256             printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
257          } else {
258             printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
259          }
260       } else {
261          printf(" ");
262       }
263       printf("%8f", *(dwords++));
264       if ((i % 8) == 7)
265          printf("\n");
266    }
267    if (i % 8)
268       printf("\n");
269 }
270 
271 /* I believe the surface format is low bits:
272 #define RB_COLOR_INFO__COLOR_FORMAT_MASK                   0x0000000fL
273 comments in sys2gmem_tex_const indicate that address is [31:12], but
274 looks like at least some of the bits above the format have different meaning..
275 */
276 static void
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)277 parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
278                  uint32_t mask)
279 {
280    assert(!is_64b()); /* this is only used on a2xx */
281    *gpuaddr = dword & ~mask;
282    *flags = dword & mask;
283 }
284 
285 static uint32_t type0_reg_vals[0xffff + 1];
286 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
287                                    8]; /* written since last draw */
288 static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
289 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
290 
291 static bool
reg_rewritten(uint32_t regbase)292 reg_rewritten(uint32_t regbase)
293 {
294    return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
295 }
296 
297 bool
reg_written(uint32_t regbase)298 reg_written(uint32_t regbase)
299 {
300    return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
301 }
302 
303 static void
clear_rewritten(void)304 clear_rewritten(void)
305 {
306    memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
307 }
308 
309 static void
clear_written(void)310 clear_written(void)
311 {
312    memset(type0_reg_written, 0, sizeof(type0_reg_written));
313    clear_rewritten();
314 }
315 
316 uint32_t
reg_lastval(uint32_t regbase)317 reg_lastval(uint32_t regbase)
318 {
319    return lastvals[regbase];
320 }
321 
322 static void
clear_lastvals(void)323 clear_lastvals(void)
324 {
325    memset(lastvals, 0, sizeof(lastvals));
326 }
327 
328 uint32_t
reg_val(uint32_t regbase)329 reg_val(uint32_t regbase)
330 {
331    return type0_reg_vals[regbase];
332 }
333 
334 void
reg_set(uint32_t regbase,uint32_t val)335 reg_set(uint32_t regbase, uint32_t val)
336 {
337    assert(regbase < regcnt());
338    type0_reg_vals[regbase] = val;
339    type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
340    type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
341 }
342 
343 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)344 reg_dump_scratch(const char *name, uint32_t dword, int level)
345 {
346    unsigned r;
347 
348    if (quiet(3))
349       return;
350 
351    r = regbase("CP_SCRATCH[0].REG");
352 
353    // if not, try old a2xx/a3xx version:
354    if (!r)
355       r = regbase("CP_SCRATCH_REG0");
356 
357    if (!r)
358       return;
359 
360    printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
361           reg_val(r + 6), reg_val(r + 7));
362 }
363 
364 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)365 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
366 {
367    void *buf;
368 
369    if (quiet(quietlvl))
370       return;
371 
372    buf = hostptr(gpuaddr);
373    if (buf) {
374       dump_hex(buf, sizedwords, level + 1);
375    }
376 }
377 
378 static void
dump_gpuaddr(uint64_t gpuaddr,int level)379 dump_gpuaddr(uint64_t gpuaddr, int level)
380 {
381    dump_gpuaddr_size(gpuaddr, level, 64, 3);
382 }
383 
384 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)385 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
386 {
387    dump_gpuaddr(dword, level);
388 }
389 
390 uint32_t gpuaddr_lo;
391 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)392 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
393 {
394    gpuaddr_lo = dword;
395 }
396 
397 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)398 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
399 {
400    dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
401 }
402 
403 static void
reg_dump_gpuaddr64(const char * name,uint64_t qword,int level)404 reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
405 {
406    dump_gpuaddr(qword, level);
407 }
408 
409 static void
dump_shader(const char * ext,void * buf,int bufsz)410 dump_shader(const char *ext, void *buf, int bufsz)
411 {
412    if (options->dump_shaders) {
413       static int n = 0;
414       char filename[16];
415       int fd;
416       sprintf(filename, "%04d.%s", n++, ext);
417       fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
418       if (fd != -1) {
419          write(fd, buf, bufsz);
420          close(fd);
421       }
422    }
423 }
424 
425 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)426 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
427 {
428    void *buf;
429 
430    gpuaddr &= 0xfffffffffffffff0;
431 
432    if (quiet(3))
433       return;
434 
435    buf = hostptr(gpuaddr);
436    if (buf) {
437       uint32_t sizedwords = hostlen(gpuaddr) / 4;
438       const char *ext;
439 
440       dump_hex(buf, min(64, sizedwords), level + 1);
441       try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id);
442 
443       /* this is a bit ugly way, but oh well.. */
444       if (strstr(name, "SP_VS_OBJ")) {
445          ext = "vo3";
446       } else if (strstr(name, "SP_FS_OBJ")) {
447          ext = "fo3";
448       } else if (strstr(name, "SP_GS_OBJ")) {
449          ext = "go3";
450       } else if (strstr(name, "SP_CS_OBJ")) {
451          ext = "co3";
452       } else {
453          ext = NULL;
454       }
455 
456       if (ext)
457          dump_shader(ext, buf, sizedwords * 4);
458    }
459 }
460 
461 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)462 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
463 {
464    disasm_gpuaddr(name, dword, level);
465 }
466 
467 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)468 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
469 {
470    disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
471 }
472 
473 static void
reg_disasm_gpuaddr64(const char * name,uint64_t qword,int level)474 reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
475 {
476    disasm_gpuaddr(name, qword, level);
477 }
478 
479 /* Find the value of the TEX_COUNT register that corresponds to the named
480  * TEX_SAMP/TEX_CONST reg.
481  *
482  * Note, this kinda assumes an equal # of samplers and textures, but not
483  * really sure if there is a much better option.  I suppose on a6xx we
484  * could instead decode the bitfields in SP_xS_CONFIG
485  */
486 static int
get_tex_count(const char * name)487 get_tex_count(const char *name)
488 {
489    char count_reg[strlen(name) + 5];
490    char *p;
491 
492    p = strstr(name, "CONST");
493    if (!p)
494       p = strstr(name, "SAMP");
495    if (!p)
496       return 0;
497 
498    int n = p - name;
499    strncpy(count_reg, name, n);
500    strcpy(count_reg + n, "COUNT");
501 
502    return reg_val(regbase(count_reg));
503 }
504 
505 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)506 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
507 {
508    if (!in_summary)
509       return;
510 
511    int num_unit = get_tex_count(name);
512    uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
513    void *buf = hostptr(gpuaddr);
514 
515    if (!buf)
516       return;
517 
518    dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
519 }
520 
521 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)522 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
523 {
524    if (!in_summary)
525       return;
526 
527    int num_unit = get_tex_count(name);
528    uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
529    void *buf = hostptr(gpuaddr);
530 
531    if (!buf)
532       return;
533 
534    dump_tex_const(buf, num_unit, level + 1);
535 }
536 
537 /*
538  * Registers with special handling (rnndec_decode() handles rest):
539  */
540 #define REG(x, fxn)    { #x, fxn }
541 #define REG64(x, fxn)  { #x, .fxn64 = fxn, .is_reg64 = true }
542 static struct {
543    const char *regname;
544    void (*fxn)(const char *name, uint32_t dword, int level);
545    void (*fxn64)(const char *name, uint64_t qword, int level);
546    uint32_t regbase;
547    bool is_reg64;
548 } reg_a2xx[] = {
549       REG(CP_SCRATCH_REG0, reg_dump_scratch),
550       REG(CP_SCRATCH_REG1, reg_dump_scratch),
551       REG(CP_SCRATCH_REG2, reg_dump_scratch),
552       REG(CP_SCRATCH_REG3, reg_dump_scratch),
553       REG(CP_SCRATCH_REG4, reg_dump_scratch),
554       REG(CP_SCRATCH_REG5, reg_dump_scratch),
555       REG(CP_SCRATCH_REG6, reg_dump_scratch),
556       REG(CP_SCRATCH_REG7, reg_dump_scratch),
557       {NULL},
558 }, reg_a3xx[] = {
559       REG(CP_SCRATCH_REG0, reg_dump_scratch),
560       REG(CP_SCRATCH_REG1, reg_dump_scratch),
561       REG(CP_SCRATCH_REG2, reg_dump_scratch),
562       REG(CP_SCRATCH_REG3, reg_dump_scratch),
563       REG(CP_SCRATCH_REG4, reg_dump_scratch),
564       REG(CP_SCRATCH_REG5, reg_dump_scratch),
565       REG(CP_SCRATCH_REG6, reg_dump_scratch),
566       REG(CP_SCRATCH_REG7, reg_dump_scratch),
567       REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
568       REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
569       REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
570       REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
571       REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
572       REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
573       {NULL},
574 }, reg_a4xx[] = {
575       REG(CP_SCRATCH[0].REG, reg_dump_scratch),
576       REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
577       REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
578       REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
579       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
580       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
581       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
582       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
583       REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
584       REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
585       REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
586       REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
587       REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
588       REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
589       REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
590       REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
591       REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
592       REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
593       REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
594       REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
595       REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
596       REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
597       REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
598       REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
599       REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
600       {NULL},
601 }, reg_a5xx[] = {
602       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
603       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
604       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
605       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
606       REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
607       REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
608       REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
609       REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
610       REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
611       REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
612       REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
613       REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
614       REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
615       REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
616       REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
617       REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
618       REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
619       REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
620       REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
621       REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
622       REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
623       REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
624       REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
625       REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
626       REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
627       REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
628       REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
629       REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
630       REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
631       REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
632       REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
633       REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
634       REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
635       REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
636       REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
637       REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
638       REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
639       REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
640       REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
641       REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
642       REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
643       REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
644 //      REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
645 //      REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
646 //      REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
647 //      REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
648 //      REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
649 //      REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
650 //      REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
651 //      REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
652 //      REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
653 //      REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
654 //      REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
655 //      REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
656 //      REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
657 //      REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
658 //      REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
659 //      REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
660 //      REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
661 //      REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
662 //      REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
663 //      REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
664 //      REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
665 //      REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
666 //      REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
667 //      REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
668 //      REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
669 //      REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
670 
671 //      REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
672 //      REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
673 //      REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
674 //      REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
675 //      REG(RB_2D_DST_LO, reg_gpuaddr_lo),
676 //      REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
677 //      REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
678 //      REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
679 
680       {NULL},
681 }, reg_a6xx[] = {
682       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
683       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
684       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
685       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
686 
687       REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
688       REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
689       REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
690       REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
691       REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
692       REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
693 
694       REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
695       REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
696       REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
697       REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
698       REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
699       REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
700       REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
701       REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
702       REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
703       REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
704       REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
705       REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
706 
707       {NULL},
708 }, *type0_reg;
709 
710 static struct rnn *rnn;
711 
712 static void
init_rnn(const char * gpuname)713 init_rnn(const char *gpuname)
714 {
715    rnn = rnn_new(!options->color);
716 
717    rnn_load(rnn, gpuname);
718 
719    if (options->querystrs) {
720       int i;
721       queryvals = calloc(options->nquery, sizeof(queryvals[0]));
722 
723       for (i = 0; i < options->nquery; i++) {
724          int val = strtol(options->querystrs[i], NULL, 0);
725 
726          if (val == 0)
727             val = regbase(options->querystrs[i]);
728 
729          queryvals[i] = val;
730          printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
731       }
732    }
733 
734    for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
735       type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
736       if (!type0_reg[idx].regbase) {
737          printf("invalid register name: %s\n", type0_reg[idx].regname);
738          exit(1);
739       }
740    }
741 }
742 
743 void
reset_regs(void)744 reset_regs(void)
745 {
746    clear_written();
747    clear_lastvals();
748    memset(&ibs, 0, sizeof(ibs));
749 }
750 
751 void
cffdec_init(const struct cffdec_options * _options)752 cffdec_init(const struct cffdec_options *_options)
753 {
754    options = _options;
755    summary = options->summary;
756 
757    /* in case we're decoding multiple files: */
758    free(queryvals);
759    reset_regs();
760    draw_count = 0;
761 
762    /* TODO we need an API to free/cleanup any previous rnn */
763 
764    switch (options->gpu_id) {
765    case 200 ... 299:
766       type0_reg = reg_a2xx;
767       init_rnn("a2xx");
768       break;
769    case 300 ... 399:
770       type0_reg = reg_a3xx;
771       init_rnn("a3xx");
772       break;
773    case 400 ... 499:
774       type0_reg = reg_a4xx;
775       init_rnn("a4xx");
776       break;
777    case 500 ... 599:
778       type0_reg = reg_a5xx;
779       init_rnn("a5xx");
780       break;
781    case 600 ... 699:
782       type0_reg = reg_a6xx;
783       init_rnn("a6xx");
784       break;
785    default:
786       errx(-1, "unsupported gpu");
787    }
788 }
789 
790 const char *
pktname(unsigned opc)791 pktname(unsigned opc)
792 {
793    return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
794 }
795 
796 const char *
regname(uint32_t regbase,int color)797 regname(uint32_t regbase, int color)
798 {
799    return rnn_regname(rnn, regbase, color);
800 }
801 
802 uint32_t
regbase(const char * name)803 regbase(const char *name)
804 {
805    return rnn_regbase(rnn, name);
806 }
807 
808 static int
endswith(uint32_t regbase,const char * suffix)809 endswith(uint32_t regbase, const char *suffix)
810 {
811    const char *name = regname(regbase, 0);
812    const char *s = strstr(name, suffix);
813    if (!s)
814       return 0;
815    return (s - strlen(name) + strlen(suffix)) == name;
816 }
817 
818 void
dump_register_val(uint32_t regbase,uint32_t dword,int level)819 dump_register_val(uint32_t regbase, uint32_t dword, int level)
820 {
821    struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
822 
823    if (info && info->typeinfo) {
824       uint64_t gpuaddr = 0;
825       char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
826       printf("%s%s: %s", levels[level], info->name, decoded);
827 
828       /* Try and figure out if we are looking at a gpuaddr.. this
829        * might be useful for other gen's too, but at least a5xx has
830        * the _HI/_LO suffix we can look for.  Maybe a better approach
831        * would be some special annotation in the xml..
832        * for a6xx use "address" and "waddress" types
833        */
834       if (options->gpu_id >= 600) {
835          if (!strcmp(info->typeinfo->name, "address") ||
836              !strcmp(info->typeinfo->name, "waddress")) {
837             gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
838          }
839       } else if (options->gpu_id >= 500) {
840          if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) {
841             gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1);
842          } else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) {
843             gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
844          }
845       }
846 
847       if (gpuaddr && hostptr(gpuaddr)) {
848          printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
849                 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
850                 hostlen(gpubaseaddr(gpuaddr)));
851       }
852 
853       printf("\n");
854 
855       free(decoded);
856    } else if (info) {
857       printf("%s%s: %08x\n", levels[level], info->name, dword);
858    } else {
859       printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
860    }
861 
862    if (info) {
863       free(info->name);
864       free(info);
865    }
866 }
867 
868 static void
dump_register(uint32_t regbase,uint32_t dword,int level)869 dump_register(uint32_t regbase, uint32_t dword, int level)
870 {
871    if (!quiet(3)) {
872       dump_register_val(regbase, dword, level);
873    }
874 
875    for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
876       if (type0_reg[idx].regbase == regbase) {
877          if (type0_reg[idx].is_reg64) {
878             uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
879             type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level);
880          } else {
881             type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
882          }
883          break;
884       }
885    }
886 }
887 
888 static bool
is_banked_reg(uint32_t regbase)889 is_banked_reg(uint32_t regbase)
890 {
891    return (0x2000 <= regbase) && (regbase < 0x2400);
892 }
893 
894 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)895 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
896                int level)
897 {
898    while (sizedwords--) {
899       int last_summary = summary;
900 
901       /* access to non-banked registers needs a WFI:
902        * TODO banked register range for a2xx??
903        */
904       if (needs_wfi && !is_banked_reg(regbase))
905          printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
906 
907       reg_set(regbase, *dwords);
908       dump_register(regbase, *dwords, level);
909       regbase++;
910       dwords++;
911       summary = last_summary;
912    }
913 }
914 
915 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)916 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
917 {
918    struct rnndomain *dom;
919    int i;
920 
921    dom = rnn_finddomain(rnn->db, name);
922 
923    if (!dom)
924       return;
925 
926    if (script_packet)
927       script_packet(dwords, sizedwords, rnn, dom);
928 
929    if (quiet(2))
930       return;
931 
932    for (i = 0; i < sizedwords; i++) {
933       struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
934       char *decoded;
935       if (!(info && info->typeinfo))
936          break;
937       uint64_t value = dwords[i];
938       if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
939          value |= (uint64_t)dwords[i + 1] << 32;
940          i++; /* skip the next dword since we're printing it now */
941       }
942       decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
943       /* Unlike the register printing path, we don't print the name
944        * of the register, so if it doesn't contain other named
945        * things (i.e. it isn't a bitset) then print the register
946        * name as if it's a bitset with a single entry. This avoids
947        * having to create a dummy register with a single entry to
948        * get a name in the decoding.
949        */
950       if (info->typeinfo->type == RNN_TTYPE_BITSET ||
951           info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
952          printf("%s%s\n", levels[level], decoded);
953       } else {
954          printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
955                 info->name, rnn->vc->colors->reset, decoded);
956       }
957       free(decoded);
958       free(info->name);
959       free(info);
960    }
961 }
962 
963 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
964 static unsigned mode;
965 static const char *render_mode;
966 static enum {
967    MODE_BINNING = 0x1,
968    MODE_GMEM = 0x2,
969    MODE_BYPASS = 0x4,
970    MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
971 } enable_mask = MODE_ALL;
972 static bool skip_ib2_enable_global;
973 static bool skip_ib2_enable_local;
974 
975 static void
print_mode(int level)976 print_mode(int level)
977 {
978    if ((options->gpu_id >= 500) && !quiet(2)) {
979       printf("%smode: %s\n", levels[level], render_mode);
980       printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
981              skip_ib2_enable_local);
982    }
983 }
984 
985 static bool
skip_query(void)986 skip_query(void)
987 {
988    switch (options->query_mode) {
989    case QUERY_ALL:
990       /* never skip: */
991       return false;
992    case QUERY_WRITTEN:
993       for (int i = 0; i < options->nquery; i++) {
994          uint32_t regbase = queryvals[i];
995          if (!reg_written(regbase)) {
996             continue;
997          }
998          if (reg_rewritten(regbase)) {
999             return false;
1000          }
1001       }
1002       return true;
1003    case QUERY_DELTA:
1004       for (int i = 0; i < options->nquery; i++) {
1005          uint32_t regbase = queryvals[i];
1006          if (!reg_written(regbase)) {
1007             continue;
1008          }
1009          uint32_t lastval = reg_val(regbase);
1010          if (lastval != lastvals[regbase]) {
1011             return false;
1012          }
1013       }
1014       return true;
1015    }
1016    return true;
1017 }
1018 
1019 static void
__do_query(const char * primtype,uint32_t num_indices)1020 __do_query(const char *primtype, uint32_t num_indices)
1021 {
1022    int n = 0;
1023 
1024    if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1025       uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1026       uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1027 
1028       bin_x1 = scissor_tl & 0xffff;
1029       bin_y1 = scissor_tl >> 16;
1030       bin_x2 = scissor_br & 0xffff;
1031       bin_y2 = scissor_br >> 16;
1032    }
1033 
1034    for (int i = 0; i < options->nquery; i++) {
1035       uint32_t regbase = queryvals[i];
1036       if (reg_written(regbase)) {
1037          uint32_t lastval = reg_val(regbase);
1038          printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1039                 bin_y1, bin_x2, bin_y2, num_indices);
1040          if (options->gpu_id >= 500)
1041             printf("%s:", render_mode);
1042          printf("\t%08x", lastval);
1043          if (lastval != lastvals[regbase]) {
1044             printf("!");
1045          } else {
1046             printf(" ");
1047          }
1048          if (reg_rewritten(regbase)) {
1049             printf("+");
1050          } else {
1051             printf(" ");
1052          }
1053          dump_register_val(regbase, lastval, 0);
1054          n++;
1055       }
1056    }
1057 
1058    if (n > 1)
1059       printf("\n");
1060 }
1061 
1062 static void
do_query_compare(const char * primtype,uint32_t num_indices)1063 do_query_compare(const char *primtype, uint32_t num_indices)
1064 {
1065    unsigned saved_enable_mask = enable_mask;
1066    const char *saved_render_mode = render_mode;
1067 
1068    /* in 'query-compare' mode, we want to see if the register is writtten
1069     * or changed in any mode:
1070     *
1071     * (NOTE: this could cause false-positive for 'query-delta' if the reg
1072     * is written with different values in binning vs sysmem/gmem mode, as
1073     * we don't track previous values per-mode, but I think we can live with
1074     * that)
1075     */
1076    enable_mask = MODE_ALL;
1077 
1078    clear_rewritten();
1079    load_all_groups(0);
1080 
1081    if (!skip_query()) {
1082       /* dump binning pass values: */
1083       enable_mask = MODE_BINNING;
1084       render_mode = "BINNING";
1085       clear_rewritten();
1086       load_all_groups(0);
1087       __do_query(primtype, num_indices);
1088 
1089       /* dump draw pass values: */
1090       enable_mask = MODE_GMEM | MODE_BYPASS;
1091       render_mode = "DRAW";
1092       clear_rewritten();
1093       load_all_groups(0);
1094       __do_query(primtype, num_indices);
1095 
1096       printf("\n");
1097    }
1098 
1099    enable_mask = saved_enable_mask;
1100    render_mode = saved_render_mode;
1101 
1102    disable_all_groups();
1103 }
1104 
1105 /* well, actually query and script..
1106  * NOTE: call this before dump_register_summary()
1107  */
1108 static void
do_query(const char * primtype,uint32_t num_indices)1109 do_query(const char *primtype, uint32_t num_indices)
1110 {
1111    if (script_draw)
1112       script_draw(primtype, num_indices);
1113 
1114    if (options->query_compare) {
1115       do_query_compare(primtype, num_indices);
1116       return;
1117    }
1118 
1119    if (skip_query())
1120       return;
1121 
1122    __do_query(primtype, num_indices);
1123 }
1124 
1125 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1126 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1127 {
1128    uint32_t start = dwords[1] >> 16;
1129    uint32_t size = dwords[1] & 0xffff;
1130    const char *type = NULL, *ext = NULL;
1131    gl_shader_stage disasm_type;
1132 
1133    switch (dwords[0]) {
1134    case 0:
1135       type = "vertex";
1136       ext = "vo";
1137       disasm_type = MESA_SHADER_VERTEX;
1138       break;
1139    case 1:
1140       type = "fragment";
1141       ext = "fo";
1142       disasm_type = MESA_SHADER_FRAGMENT;
1143       break;
1144    default:
1145       type = "<unknown>";
1146       disasm_type = 0;
1147       break;
1148    }
1149 
1150    printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1151           size);
1152    disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1153 
1154    /* dump raw shader: */
1155    if (ext)
1156       dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1157 }
1158 
1159 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1160 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1161 {
1162    uint32_t reg = dwords[0] & 0xffff;
1163    int i;
1164    for (i = 1; i < sizedwords; i++) {
1165       dump_register(reg, dwords[i], level + 1);
1166       reg_set(reg, dwords[i]);
1167       reg++;
1168    }
1169 }
1170 
1171 enum state_t {
1172    TEX_SAMP = 1,
1173    TEX_CONST,
1174    TEX_MIPADDR, /* a3xx only */
1175    SHADER_PROG,
1176    SHADER_CONST,
1177 
1178    // image/ssbo state:
1179    SSBO_0,
1180    SSBO_1,
1181    SSBO_2,
1182 
1183    UBO,
1184 
1185    // unknown things, just to hexdumps:
1186    UNKNOWN_DWORDS,
1187    UNKNOWN_2DWORDS,
1188    UNKNOWN_4DWORDS,
1189 };
1190 
1191 enum adreno_state_block {
1192    SB_VERT_TEX = 0,
1193    SB_VERT_MIPADDR = 1,
1194    SB_FRAG_TEX = 2,
1195    SB_FRAG_MIPADDR = 3,
1196    SB_VERT_SHADER = 4,
1197    SB_GEOM_SHADER = 5,
1198    SB_FRAG_SHADER = 6,
1199    SB_COMPUTE_SHADER = 7,
1200 };
1201 
1202 /* TODO there is probably a clever way to let rnndec parse things so
1203  * we don't have to care about packet format differences across gens
1204  */
1205 
1206 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1207 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1208                     enum state_t *state, enum state_src_t *src)
1209 {
1210    unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1211    unsigned state_type = dwords[1] & 0x3;
1212    static const struct {
1213       gl_shader_stage stage;
1214       enum state_t state;
1215    } lookup[0xf][0x3] = {
1216       [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1217       [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1218       [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1219       [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1220       [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1221       [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1222       [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1223       [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1224    };
1225 
1226    *stage = lookup[state_block_id][state_type].stage;
1227    *state = lookup[state_block_id][state_type].state;
1228    unsigned state_src = (dwords[0] >> 16) & 0x7;
1229    if (state_src == 0 /* SS_DIRECT */)
1230       *src = STATE_SRC_DIRECT;
1231    else
1232       *src = STATE_SRC_INDIRECT;
1233 }
1234 
1235 static enum state_src_t
_get_state_src(unsigned dword0)1236 _get_state_src(unsigned dword0)
1237 {
1238    switch ((dword0 >> 16) & 0x3) {
1239    case 0: /* SS4_DIRECT / SS6_DIRECT */
1240       return STATE_SRC_DIRECT;
1241    case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1242       return STATE_SRC_INDIRECT;
1243    case 1: /* SS6_BINDLESS */
1244       return STATE_SRC_BINDLESS;
1245    default:
1246       return STATE_SRC_DIRECT;
1247    }
1248 }
1249 
1250 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1251 _get_state_type(unsigned state_block_id, unsigned state_type,
1252                 gl_shader_stage *stage, enum state_t *state)
1253 {
1254    static const struct {
1255       gl_shader_stage stage;
1256       enum state_t state;
1257    } lookup[0x10][0x4] = {
1258       // SB4_VS_TEX:
1259       [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1260       [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1261       [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1262       // SB4_HS_TEX:
1263       [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1264       [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1265       [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1266       // SB4_DS_TEX:
1267       [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1268       [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1269       [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1270       // SB4_GS_TEX:
1271       [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1272       [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1273       [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1274       // SB4_FS_TEX:
1275       [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1276       [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1277       [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1278       // SB4_CS_TEX:
1279       [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1280       [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1281       [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1282       // SB4_VS_SHADER:
1283       [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1284       [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1285       [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1286       // SB4_HS_SHADER
1287       [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1288       [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1289       [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1290       // SB4_DS_SHADER
1291       [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1292       [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1293       [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1294       // SB4_GS_SHADER
1295       [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1296       [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1297       [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1298       // SB4_FS_SHADER:
1299       [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1300       [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1301       [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1302       // SB4_CS_SHADER:
1303       [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1304       [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1305       [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1306       [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1307       // SB4_SSBO (shared across all stages)
1308       [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1309       [0xe][1] = {0, SSBO_1},
1310       [0xe][2] = {0, SSBO_2},
1311       // SB4_CS_SSBO
1312       [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1313       [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1314       [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1315       // unknown things
1316       /* This looks like combined UBO state for 3d stages (a5xx and
1317        * before??  I think a6xx has UBO state per shader stage:
1318        */
1319       [0x6][2] = {0, UBO},
1320       [0x7][1] = {0, UNKNOWN_2DWORDS},
1321    };
1322 
1323    *stage = lookup[state_block_id][state_type].stage;
1324    *state = lookup[state_block_id][state_type].state;
1325 }
1326 
1327 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1328 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1329                     enum state_t *state, enum state_src_t *src)
1330 {
1331    unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1332    unsigned state_type = dwords[1] & 0x3;
1333    _get_state_type(state_block_id, state_type, stage, state);
1334    *src = _get_state_src(dwords[0]);
1335 }
1336 
1337 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1338 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1339                     enum state_t *state, enum state_src_t *src)
1340 {
1341    unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1342    unsigned state_type = (dwords[0] >> 14) & 0x3;
1343    _get_state_type(state_block_id, state_type, stage, state);
1344    *src = _get_state_src(dwords[0]);
1345 }
1346 
1347 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1348 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1349 {
1350    for (int i = 0; i < num_unit; i++) {
1351       /* work-around to reduce noise for opencl blob which always
1352        * writes the max # regardless of # of textures used
1353        */
1354       if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1355          break;
1356 
1357       if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1358          dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1359          dump_hex(texsamp, 2, level + 1);
1360          texsamp += 2;
1361       } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1362          dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1363          dump_hex(texsamp, 2, level + 1);
1364          texsamp += 2;
1365       } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1366          dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1367          dump_hex(texsamp, 4, level + 1);
1368          texsamp += 4;
1369       } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1370          dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1371          dump_hex(texsamp, 4, level + 1);
1372          texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1373       }
1374    }
1375 }
1376 
1377 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1378 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1379 {
1380    for (int i = 0; i < num_unit; i++) {
1381       /* work-around to reduce noise for opencl blob which always
1382        * writes the max # regardless of # of textures used
1383        */
1384       if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1385           (texconst[2] == 0) && (texconst[3] == 0))
1386          break;
1387 
1388       if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1389          dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1390          dump_hex(texconst, 4, level + 1);
1391          texconst += 4;
1392       } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1393          dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1394          if (options->dump_textures) {
1395             uint32_t addr = texconst[4] & ~0x1f;
1396             dump_gpuaddr(addr, level - 2);
1397          }
1398          dump_hex(texconst, 8, level + 1);
1399          texconst += 8;
1400       } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1401          dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1402          if (options->dump_textures) {
1403             uint64_t addr =
1404                (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1405             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1406          }
1407          dump_hex(texconst, 12, level + 1);
1408          texconst += 12;
1409       } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1410          dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1411          if (options->dump_textures) {
1412             uint64_t addr =
1413                (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1414             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1415          }
1416          dump_hex(texconst, 16, level + 1);
1417          texconst += 16;
1418       }
1419    }
1420 }
1421 
1422 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1423 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1424 {
1425    gl_shader_stage stage;
1426    enum state_t state;
1427    enum state_src_t src;
1428    uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1429    uint64_t ext_src_addr;
1430    void *contents;
1431    int i;
1432 
1433    if (quiet(2) && !options->script)
1434       return;
1435 
1436    if (options->gpu_id >= 600)
1437       a6xx_get_state_type(dwords, &stage, &state, &src);
1438    else if (options->gpu_id >= 400)
1439       a4xx_get_state_type(dwords, &stage, &state, &src);
1440    else
1441       a3xx_get_state_type(dwords, &stage, &state, &src);
1442 
1443    switch (src) {
1444    case STATE_SRC_DIRECT:
1445       ext_src_addr = 0;
1446       break;
1447    case STATE_SRC_INDIRECT:
1448       if (is_64b()) {
1449          ext_src_addr = dwords[1] & 0xfffffffc;
1450          ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1451       } else {
1452          ext_src_addr = dwords[1] & 0xfffffffc;
1453       }
1454 
1455       break;
1456    case STATE_SRC_BINDLESS: {
1457       const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1458                                    ? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR")
1459                                    : regbase("HLSQ_BINDLESS_BASE[0].ADDR");
1460 
1461       if (is_64b()) {
1462          const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1463          ext_src_addr = reg_val(reg) & 0xfffffffc;
1464          ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1465       } else {
1466          const unsigned reg = base_reg + (dwords[1] >> 28);
1467          ext_src_addr = reg_val(reg) & 0xfffffffc;
1468       }
1469 
1470       ext_src_addr += 4 * (dwords[1] & 0xffffff);
1471       break;
1472    }
1473    }
1474 
1475    if (ext_src_addr)
1476       contents = hostptr(ext_src_addr);
1477    else
1478       contents = is_64b() ? dwords + 3 : dwords + 2;
1479 
1480    if (!contents)
1481       return;
1482 
1483    switch (state) {
1484    case SHADER_PROG: {
1485       const char *ext = NULL;
1486 
1487       if (quiet(2))
1488          return;
1489 
1490       if (options->gpu_id >= 400)
1491          num_unit *= 16;
1492       else if (options->gpu_id >= 300)
1493          num_unit *= 4;
1494 
1495       /* shaders:
1496        *
1497        * note: num_unit seems to be # of instruction groups, where
1498        * an instruction group has 4 64bit instructions.
1499        */
1500       if (stage == MESA_SHADER_VERTEX) {
1501          ext = "vo3";
1502       } else if (stage == MESA_SHADER_GEOMETRY) {
1503          ext = "go3";
1504       } else if (stage == MESA_SHADER_COMPUTE) {
1505          ext = "co3";
1506       } else if (stage == MESA_SHADER_FRAGMENT) {
1507          ext = "fo3";
1508       }
1509 
1510       if (contents)
1511          try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1512                          options->gpu_id);
1513 
1514       /* dump raw shader: */
1515       if (ext)
1516          dump_shader(ext, contents, num_unit * 2 * 4);
1517 
1518       break;
1519    }
1520    case SHADER_CONST: {
1521       if (quiet(2))
1522          return;
1523 
1524       /* uniforms/consts:
1525        *
1526        * note: num_unit seems to be # of pairs of dwords??
1527        */
1528 
1529       if (options->gpu_id >= 400)
1530          num_unit *= 2;
1531 
1532       dump_float(contents, num_unit * 2, level + 1);
1533       dump_hex(contents, num_unit * 2, level + 1);
1534 
1535       break;
1536    }
1537    case TEX_MIPADDR: {
1538       uint32_t *addrs = contents;
1539 
1540       if (quiet(2))
1541          return;
1542 
1543       /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1544       for (i = 0; i < num_unit; i++) {
1545          void *ptr = hostptr(addrs[i]);
1546          printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1547          if (options->dump_textures) {
1548             printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1549             dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1550          }
1551       }
1552       break;
1553    }
1554    case TEX_SAMP: {
1555       dump_tex_samp(contents, src, num_unit, level);
1556       break;
1557    }
1558    case TEX_CONST: {
1559       dump_tex_const(contents, num_unit, level);
1560       break;
1561    }
1562    case SSBO_0: {
1563       uint32_t *ssboconst = (uint32_t *)contents;
1564 
1565       for (i = 0; i < num_unit; i++) {
1566          int sz = 4;
1567          if (400 <= options->gpu_id && options->gpu_id < 500) {
1568             dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1569          } else if (500 <= options->gpu_id && options->gpu_id < 600) {
1570             dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1571          } else if (600 <= options->gpu_id && options->gpu_id < 700) {
1572             sz = 16;
1573             dump_domain(ssboconst, 16, level + 2, "A6XX_IBO");
1574          }
1575          dump_hex(ssboconst, sz, level + 1);
1576          ssboconst += sz;
1577       }
1578       break;
1579    }
1580    case SSBO_1: {
1581       uint32_t *ssboconst = (uint32_t *)contents;
1582 
1583       for (i = 0; i < num_unit; i++) {
1584          if (400 <= options->gpu_id && options->gpu_id < 500)
1585             dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1586          else if (500 <= options->gpu_id && options->gpu_id < 600)
1587             dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1588          dump_hex(ssboconst, 2, level + 1);
1589          ssboconst += 2;
1590       }
1591       break;
1592    }
1593    case SSBO_2: {
1594       uint32_t *ssboconst = (uint32_t *)contents;
1595 
1596       for (i = 0; i < num_unit; i++) {
1597          /* TODO a4xx and a5xx might be same: */
1598          if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1599             dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1600             dump_hex(ssboconst, 2, level + 1);
1601          }
1602          if (options->dump_textures) {
1603             uint64_t addr =
1604                (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1605             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1606          }
1607          ssboconst += 2;
1608       }
1609       break;
1610    }
1611    case UBO: {
1612       uint32_t *uboconst = (uint32_t *)contents;
1613 
1614       for (i = 0; i < num_unit; i++) {
1615          // TODO probably similar on a4xx..
1616          if (500 <= options->gpu_id && options->gpu_id < 600)
1617             dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1618          else if (600 <= options->gpu_id && options->gpu_id < 700)
1619             dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1620          dump_hex(uboconst, 2, level + 1);
1621          uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1622       }
1623       break;
1624    }
1625    case UNKNOWN_DWORDS: {
1626       if (quiet(2))
1627          return;
1628       dump_hex(contents, num_unit, level + 1);
1629       break;
1630    }
1631    case UNKNOWN_2DWORDS: {
1632       if (quiet(2))
1633          return;
1634       dump_hex(contents, num_unit * 2, level + 1);
1635       break;
1636    }
1637    case UNKNOWN_4DWORDS: {
1638       if (quiet(2))
1639          return;
1640       dump_hex(contents, num_unit * 4, level + 1);
1641       break;
1642    }
1643    default:
1644       if (quiet(2))
1645          return;
1646       /* hmm.. */
1647       dump_hex(contents, num_unit, level + 1);
1648       break;
1649    }
1650 }
1651 
1652 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1653 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1654 {
1655    bin_x1 = dwords[1] & 0xffff;
1656    bin_y1 = dwords[1] >> 16;
1657    bin_x2 = dwords[2] & 0xffff;
1658    bin_y2 = dwords[2] >> 16;
1659 }
1660 
1661 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1662 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1663                     int level)
1664 {
1665    uint32_t w, h, p;
1666    uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1667    uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1668    static const char *filter[] = {
1669       "point",
1670       "bilinear",
1671       "bicubic",
1672    };
1673    static const char *clamp[] = {
1674       "wrap",
1675       "mirror",
1676       "clamp-last-texel",
1677    };
1678    static const char swiznames[] = "xyzw01??";
1679 
1680    /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1681 
1682    /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1683     * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1684     */
1685    p = (dwords[0] >> 22) << 5;
1686    clamp_x = (dwords[0] >> 10) & 0x3;
1687    clamp_y = (dwords[0] >> 13) & 0x3;
1688    clamp_z = (dwords[0] >> 16) & 0x3;
1689 
1690    /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1691     * NearestClamp=1:OGL Mode
1692     */
1693    parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1694 
1695    /* Width, Height, EndianSwap=0:None */
1696    w = (dwords[2] & 0x1fff) + 1;
1697    h = ((dwords[2] >> 13) & 0x1fff) + 1;
1698 
1699    /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1700     * Mip=2:BaseMap
1701     */
1702    mag = (dwords[3] >> 19) & 0x3;
1703    min = (dwords[3] >> 21) & 0x3;
1704    swiz = (dwords[3] >> 1) & 0xfff;
1705 
1706    /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1707     * Dim3d=0
1708     */
1709    // XXX
1710 
1711    /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1712     * Dim=1:2d, MipPacking=0
1713     */
1714    parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1715 
1716    printf("%sset texture const %04x\n", levels[level], val);
1717    printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1718           clamp[clamp_y], clamp[clamp_z]);
1719    printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1720           filter[mag]);
1721    printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1722           swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1723           swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1724    printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1725           levels[level + 1], gpuaddr, flags, w, h, p,
1726           rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1727    printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1728           mip_flags);
1729 }
1730 
1731 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1732 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1733                        int level)
1734 {
1735    int i;
1736    printf("%sset shader const %04x\n", levels[level], val);
1737    for (i = 0; i < sizedwords;) {
1738       uint32_t gpuaddr, flags;
1739       parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1740       void *addr = hostptr(gpuaddr);
1741       if (addr) {
1742          const char *fmt =
1743             rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1744          uint32_t size = dwords[i++];
1745          printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1746                 size, fmt);
1747          // TODO maybe dump these as bytes instead of dwords?
1748          size = (size + 3) / 4; // for now convert to dwords
1749          dump_hex(addr, min(size, 64), level + 1);
1750          if (size > min(size, 64))
1751             printf("%s\t\t...\n", levels[level + 1]);
1752          dump_float(addr, min(size, 64), level + 1);
1753          if (size > min(size, 64))
1754             printf("%s\t\t...\n", levels[level + 1]);
1755       }
1756    }
1757 }
1758 
1759 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1760 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1761 {
1762    uint32_t val = dwords[0] & 0xffff;
1763    switch ((dwords[0] >> 16) & 0xf) {
1764    case 0x0:
1765       dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1766       break;
1767    case 0x1:
1768       /* need to figure out how const space is partitioned between
1769        * attributes, textures, etc..
1770        */
1771       if (val < 0x78) {
1772          dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1773       } else {
1774          dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1775       }
1776       break;
1777    case 0x2:
1778       printf("%sset bool const %04x\n", levels[level], val);
1779       break;
1780    case 0x3:
1781       printf("%sset loop const %04x\n", levels[level], val);
1782       break;
1783    case 0x4:
1784       val += 0x2000;
1785       if (dwords[0] & 0x80000000) {
1786          uint32_t srcreg = dwords[1];
1787          uint32_t dstval = dwords[2];
1788 
1789          /* TODO: not sure what happens w/ payload != 2.. */
1790          assert(sizedwords == 3);
1791          assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1792 
1793          /* note: rnn_regname uses a static buf so we can't do
1794           * two regname() calls for one printf..
1795           */
1796          printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1797          printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1798 
1799          dstval += type0_reg_vals[srcreg];
1800 
1801          dump_registers(val, &dstval, 1, level + 1);
1802       } else {
1803          dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1804       }
1805       break;
1806    }
1807 }
1808 
1809 static void dump_register_summary(int level);
1810 
1811 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1812 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1813 {
1814    const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1815    printl(2, "%sevent %s\n", levels[level], name);
1816 
1817    if (name && (options->gpu_id > 500)) {
1818       char eventname[64];
1819       snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1820       if (!strcmp(name, "BLIT")) {
1821          do_query(eventname, 0);
1822          print_mode(level);
1823          dump_register_summary(level);
1824       }
1825    }
1826 }
1827 
1828 static void
dump_register_summary(int level)1829 dump_register_summary(int level)
1830 {
1831    uint32_t i;
1832    bool saved_summary = summary;
1833    summary = false;
1834 
1835    in_summary = true;
1836 
1837    /* dump current state of registers: */
1838    printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1839    for (i = 0; i < regcnt(); i++) {
1840       uint32_t regbase = i;
1841       uint32_t lastval = reg_val(regbase);
1842       /* skip registers that haven't been updated since last draw/blit: */
1843       if (!(options->allregs || reg_rewritten(regbase)))
1844          continue;
1845       if (!reg_written(regbase))
1846          continue;
1847       if (lastval != lastvals[regbase]) {
1848          printl(2, "!");
1849          lastvals[regbase] = lastval;
1850       } else {
1851          printl(2, " ");
1852       }
1853       if (reg_rewritten(regbase)) {
1854          printl(2, "+");
1855       } else {
1856          printl(2, " ");
1857       }
1858       printl(2, "\t%08x", lastval);
1859       if (!quiet(2)) {
1860          dump_register(regbase, lastval, level);
1861       }
1862    }
1863 
1864    clear_rewritten();
1865 
1866    in_summary = false;
1867 
1868    draw_count++;
1869    summary = saved_summary;
1870 }
1871 
1872 static uint32_t
draw_indx_common(uint32_t * dwords,int level)1873 draw_indx_common(uint32_t *dwords, int level)
1874 {
1875    uint32_t prim_type = dwords[1] & 0x1f;
1876    uint32_t source_select = (dwords[1] >> 6) & 0x3;
1877    uint32_t num_indices = dwords[2];
1878    const char *primtype;
1879 
1880    primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1881 
1882    do_query(primtype, num_indices);
1883 
1884    printl(2, "%sdraw:          %d\n", levels[level], draws[ib]);
1885    printl(2, "%sprim_type:     %s (%d)\n", levels[level], primtype, prim_type);
1886    printl(2, "%ssource_select: %s (%d)\n", levels[level],
1887           rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1888    printl(2, "%snum_indices:   %d\n", levels[level], num_indices);
1889 
1890    vertices += num_indices;
1891 
1892    draws[ib]++;
1893 
1894    return num_indices;
1895 }
1896 
1897 enum pc_di_index_size {
1898    INDEX_SIZE_IGN = 0,
1899    INDEX_SIZE_16_BIT = 0,
1900    INDEX_SIZE_32_BIT = 1,
1901    INDEX_SIZE_8_BIT = 2,
1902    INDEX_SIZE_INVALID = 0,
1903 };
1904 
1905 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)1906 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1907 {
1908    uint32_t num_indices = draw_indx_common(dwords, level);
1909 
1910    assert(!is_64b());
1911 
1912    /* if we have an index buffer, dump that: */
1913    if (sizedwords == 5) {
1914       void *ptr = hostptr(dwords[3]);
1915       printl(2, "%sgpuaddr:       %08x\n", levels[level], dwords[3]);
1916       printl(2, "%sidx_size:      %d\n", levels[level], dwords[4]);
1917       if (ptr) {
1918          enum pc_di_index_size size =
1919             ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1920          if (!quiet(2)) {
1921             int i;
1922             printf("%sidxs:         ", levels[level]);
1923             if (size == INDEX_SIZE_8_BIT) {
1924                uint8_t *idx = ptr;
1925                for (i = 0; i < dwords[4]; i++)
1926                   printf(" %u", idx[i]);
1927             } else if (size == INDEX_SIZE_16_BIT) {
1928                uint16_t *idx = ptr;
1929                for (i = 0; i < dwords[4] / 2; i++)
1930                   printf(" %u", idx[i]);
1931             } else if (size == INDEX_SIZE_32_BIT) {
1932                uint32_t *idx = ptr;
1933                for (i = 0; i < dwords[4] / 4; i++)
1934                   printf(" %u", idx[i]);
1935             }
1936             printf("\n");
1937             dump_hex(ptr, dwords[4] / 4, level + 1);
1938          }
1939       }
1940    }
1941 
1942    /* don't bother dumping registers for the dummy draw_indx's.. */
1943    if (num_indices > 0)
1944       dump_register_summary(level);
1945 
1946    needs_wfi = true;
1947 }
1948 
1949 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)1950 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1951 {
1952    uint32_t num_indices = draw_indx_common(dwords, level);
1953    enum pc_di_index_size size =
1954       ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1955    void *ptr = &dwords[3];
1956    int sz = 0;
1957 
1958    assert(!is_64b());
1959 
1960    /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1961    if (!quiet(2)) {
1962       int i;
1963       printf("%sidxs:         ", levels[level]);
1964       if (size == INDEX_SIZE_8_BIT) {
1965          uint8_t *idx = ptr;
1966          for (i = 0; i < num_indices; i++)
1967             printf(" %u", idx[i]);
1968          sz = num_indices;
1969       } else if (size == INDEX_SIZE_16_BIT) {
1970          uint16_t *idx = ptr;
1971          for (i = 0; i < num_indices; i++)
1972             printf(" %u", idx[i]);
1973          sz = num_indices * 2;
1974       } else if (size == INDEX_SIZE_32_BIT) {
1975          uint32_t *idx = ptr;
1976          for (i = 0; i < num_indices; i++)
1977             printf(" %u", idx[i]);
1978          sz = num_indices * 4;
1979       }
1980       printf("\n");
1981       dump_hex(ptr, sz / 4, level + 1);
1982    }
1983 
1984    /* don't bother dumping registers for the dummy draw_indx's.. */
1985    if (num_indices > 0)
1986       dump_register_summary(level);
1987 }
1988 
1989 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)1990 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1991 {
1992    uint32_t num_indices = dwords[2];
1993    uint32_t prim_type = dwords[0] & 0x1f;
1994 
1995    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1996    print_mode(level);
1997 
1998    /* don't bother dumping registers for the dummy draw_indx's.. */
1999    if (num_indices > 0)
2000       dump_register_summary(level);
2001 }
2002 
2003 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2004 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2005 {
2006    uint32_t prim_type = dwords[0] & 0x1f;
2007    uint64_t addr;
2008 
2009    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2010    print_mode(level);
2011 
2012    if (is_64b())
2013       addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2014    else
2015       addr = dwords[1];
2016    dump_gpuaddr_size(addr, level, 0x10, 2);
2017 
2018    if (is_64b())
2019       addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2020    else
2021       addr = dwords[3];
2022    dump_gpuaddr_size(addr, level, 0x10, 2);
2023 
2024    dump_register_summary(level);
2025 }
2026 
2027 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2028 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2029 {
2030    uint32_t prim_type = dwords[0] & 0x1f;
2031    uint64_t addr;
2032 
2033    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2034    print_mode(level);
2035 
2036    addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2037    dump_gpuaddr_size(addr, level, 0x10, 2);
2038 
2039    dump_register_summary(level);
2040 }
2041 
2042 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2043 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2044 {
2045    uint32_t prim_type = dwords[0] & 0x1f;
2046    uint32_t count = dwords[2];
2047 
2048    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2049    print_mode(level);
2050 
2051    struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2052    uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2053    uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2054    uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2055 
2056    if (count_dword) {
2057       uint64_t count_addr =
2058          ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2059       uint32_t *buf = hostptr(count_addr);
2060 
2061       /* Don't print more draws than this if we don't know the indirect
2062        * count. It's possible the user will give ~0 or some other large
2063        * value, expecting the GPU to fill in the draw count, and we don't
2064        * want to print a gazillion draws in that case:
2065        */
2066       const uint32_t max_draw_count = 0x100;
2067 
2068       /* Assume the indirect count is garbage if it's larger than this
2069        * (quite large) value or 0. Hopefully this catches most cases.
2070        */
2071       const uint32_t max_indirect_draw_count = 0x10000;
2072 
2073       if (buf) {
2074          printf("%sindirect count: %u\n", levels[level], *buf);
2075          if (*buf == 0 || *buf > max_indirect_draw_count) {
2076             /* garbage value */
2077             count = min(count, max_draw_count);
2078          } else {
2079             /* not garbage */
2080             count = min(count, *buf);
2081          }
2082       } else {
2083          count = min(count, max_draw_count);
2084       }
2085    }
2086 
2087    if (addr_dword && stride_dword) {
2088       uint64_t addr =
2089          ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2090       uint32_t stride = dwords[stride_dword];
2091 
2092       for (unsigned i = 0; i < count; i++, addr += stride) {
2093          printf("%sdraw %d:\n", levels[level], i);
2094          dump_gpuaddr_size(addr, level, 0x10, 2);
2095       }
2096    }
2097 
2098    dump_register_summary(level);
2099 }
2100 
2101 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2102 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2103 {
2104    do_query("COMPUTE", 1);
2105    dump_register_summary(level);
2106 }
2107 
2108 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2109 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2110 {
2111    const char *buf = (void *)dwords;
2112    int i;
2113 
2114    if (quiet(3))
2115       return;
2116 
2117    // blob doesn't use CP_NOP for string_marker but it does
2118    // use it for things that end up looking like, but aren't
2119    // ascii chars:
2120    if (!options->decode_markers)
2121       return;
2122 
2123    for (i = 0; i < 4 * sizedwords; i++) {
2124       if (buf[i] == '\0')
2125          break;
2126       if (isascii(buf[i]))
2127          printf("%c", buf[i]);
2128    }
2129    printf("\n");
2130 }
2131 
2132 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2133 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2134 {
2135    /* traverse indirect buffers */
2136    uint64_t ibaddr;
2137    uint32_t ibsize;
2138    uint32_t *ptr = NULL;
2139 
2140    if (is_64b()) {
2141       /* a5xx+.. high 32b of gpu addr, then size: */
2142       ibaddr = dwords[0];
2143       ibaddr |= ((uint64_t)dwords[1]) << 32;
2144       ibsize = dwords[2];
2145    } else {
2146       ibaddr = dwords[0];
2147       ibsize = dwords[1];
2148    }
2149 
2150    if (!quiet(3)) {
2151       if (is_64b()) {
2152          printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2153       } else {
2154          printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2155       }
2156       printf("%sibsize:%08x\n", levels[level], ibsize);
2157    }
2158 
2159    if (options->once && has_dumped(ibaddr, enable_mask))
2160       return;
2161 
2162    /* 'query-compare' mode implies 'once' mode, although we need only to
2163     * process the cmdstream for *any* enable_mask mode, since we are
2164     * comparing binning vs draw reg values at the same time, ie. it is
2165     * not useful to process the same draw in both binning and draw pass.
2166     */
2167    if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2168       return;
2169 
2170    /* map gpuaddr back to hostptr: */
2171    ptr = hostptr(ibaddr);
2172 
2173    if (ptr) {
2174       /* If the GPU hung within the target IB, the trigger point will be
2175        * just after the current CP_INDIRECT_BUFFER.  Because the IB is
2176        * executed but never returns.  Account for this by checking if
2177        * the IB returned:
2178        */
2179       highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2180 
2181       ib++;
2182       ibs[ib].base = ibaddr;
2183       ibs[ib].size = ibsize;
2184 
2185       dump_commands(ptr, ibsize, level);
2186       ib--;
2187    } else {
2188       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2189    }
2190 }
2191 
2192 static void
cp_start_bin(uint32_t * dwords,uint32_t sizedwords,int level)2193 cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2194 {
2195    uint64_t ibaddr;
2196    uint32_t ibsize;
2197    uint32_t loopcount;
2198    uint32_t *ptr = NULL;
2199 
2200    loopcount = dwords[0];
2201    ibaddr = dwords[1];
2202    ibaddr |= ((uint64_t)dwords[2]) << 32;
2203    ibsize = dwords[3];
2204 
2205    /* map gpuaddr back to hostptr: */
2206    ptr = hostptr(ibaddr);
2207 
2208    if (ptr) {
2209       /* If the GPU hung within the target IB, the trigger point will be
2210        * just after the current CP_START_BIN.  Because the IB is
2211        * executed but never returns.  Account for this by checking if
2212        * the IB returned:
2213        */
2214       highlight_gpuaddr(gpuaddr(&dwords[5]));
2215 
2216       /* TODO: we should duplicate the body of the loop after each bin, so
2217        * that draws get the correct state. We should also figure out if there
2218        * are any registers that can tell us what bin we're in when we hang so
2219        * that crashdec points to the right place.
2220        */
2221       ib++;
2222       for (uint32_t i = 0; i < loopcount; i++) {
2223          ibs[ib].base = ibaddr;
2224          ibs[ib].size = ibsize;
2225          printf("%sbin %u\n", levels[level], i);
2226          dump_commands(ptr, ibsize, level);
2227          ibaddr += ibsize;
2228          ptr += ibsize;
2229       }
2230       ib--;
2231    } else {
2232       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2233    }
2234 }
2235 
2236 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2237 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2238 {
2239    needs_wfi = false;
2240 }
2241 
2242 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2243 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2244 {
2245    if (quiet(2))
2246       return;
2247 
2248    if (is_64b()) {
2249       uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2250       printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2251       dump_hex(&dwords[2], sizedwords - 2, level + 1);
2252 
2253       if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2254          dump_commands(&dwords[2], sizedwords - 2, level + 1);
2255    } else {
2256       uint32_t gpuaddr = dwords[0];
2257       printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2258       dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2259    }
2260 }
2261 
2262 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2263 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2264 {
2265    uint32_t val = dwords[0] & 0xffff;
2266    uint32_t and = dwords[1];
2267    uint32_t or = dwords[2];
2268    printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2269           and, or);
2270    if (needs_wfi)
2271       printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2272              and, or);
2273    reg_set(val, (reg_val(val) & and) | or);
2274 }
2275 
2276 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2277 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2278 {
2279    uint32_t val = dwords[0] & 0xffff;
2280    printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2281 
2282    if (quiet(2))
2283       return;
2284 
2285    uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2286    printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2287    void *ptr = hostptr(gpuaddr);
2288    if (ptr) {
2289       uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2290       dump_hex(ptr, cnt, level + 1);
2291    }
2292 }
2293 
2294 struct draw_state {
2295    uint16_t enable_mask;
2296    uint16_t flags;
2297    uint32_t count;
2298    uint64_t addr;
2299 };
2300 
2301 struct draw_state state[32];
2302 
2303 #define FLAG_DIRTY              0x1
2304 #define FLAG_DISABLE            0x2
2305 #define FLAG_DISABLE_ALL_GROUPS 0x4
2306 #define FLAG_LOAD_IMMED         0x8
2307 
2308 static int draw_mode;
2309 
2310 static void
disable_group(unsigned group_id)2311 disable_group(unsigned group_id)
2312 {
2313    struct draw_state *ds = &state[group_id];
2314    memset(ds, 0, sizeof(*ds));
2315 }
2316 
2317 static void
disable_all_groups(void)2318 disable_all_groups(void)
2319 {
2320    for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2321       disable_group(i);
2322 }
2323 
2324 static void
load_group(unsigned group_id,int level)2325 load_group(unsigned group_id, int level)
2326 {
2327    struct draw_state *ds = &state[group_id];
2328 
2329    if (!ds->count)
2330       return;
2331 
2332    printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2333    printl(2, "%scount: %d\n", levels[level], ds->count);
2334    printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2335    printl(2, "%sflags: %x\n", levels[level], ds->flags);
2336 
2337    if (options->gpu_id >= 600) {
2338       printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2339 
2340       if (!(ds->enable_mask & enable_mask)) {
2341          printl(2, "%s\tskipped!\n\n", levels[level]);
2342          return;
2343       }
2344    }
2345 
2346    void *ptr = hostptr(ds->addr);
2347    if (ptr) {
2348       if (!quiet(2))
2349          dump_hex(ptr, ds->count, level + 1);
2350 
2351       ib++;
2352       dump_commands(ptr, ds->count, level + 1);
2353       ib--;
2354    }
2355 }
2356 
2357 static void
load_all_groups(int level)2358 load_all_groups(int level)
2359 {
2360    /* sanity check, we should never recursively hit recursion here, and if
2361     * we do bad things happen:
2362     */
2363    static bool loading_groups = false;
2364    if (loading_groups) {
2365       printf("ERROR: nothing in draw state should trigger recursively loading "
2366              "groups!\n");
2367       return;
2368    }
2369    loading_groups = true;
2370    for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2371       load_group(i, level);
2372    loading_groups = false;
2373 
2374    /* in 'query-compare' mode, defer disabling all groups until we have a
2375     * chance to process the query:
2376     */
2377    if (!options->query_compare)
2378       disable_all_groups();
2379 }
2380 
2381 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2382 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2383 {
2384    uint32_t i;
2385 
2386    for (i = 0; i < sizedwords;) {
2387       struct draw_state *ds;
2388       uint32_t count = dwords[i] & 0xffff;
2389       uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2390       uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2391       uint32_t flags = (dwords[i] >> 16) & 0xf;
2392       uint64_t addr;
2393 
2394       if (is_64b()) {
2395          addr = dwords[i + 1];
2396          addr |= ((uint64_t)dwords[i + 2]) << 32;
2397          i += 3;
2398       } else {
2399          addr = dwords[i + 1];
2400          i += 2;
2401       }
2402 
2403       if (flags & FLAG_DISABLE_ALL_GROUPS) {
2404          disable_all_groups();
2405          continue;
2406       }
2407 
2408       if (flags & FLAG_DISABLE) {
2409          disable_group(group_id);
2410          continue;
2411       }
2412 
2413       assert(group_id < ARRAY_SIZE(state));
2414       disable_group(group_id);
2415 
2416       ds = &state[group_id];
2417 
2418       ds->enable_mask = enable_mask;
2419       ds->flags = flags;
2420       ds->count = count;
2421       ds->addr = addr;
2422 
2423       if (flags & FLAG_LOAD_IMMED) {
2424          load_group(group_id, level);
2425          disable_group(group_id);
2426       }
2427    }
2428 }
2429 
2430 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2431 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2432 {
2433    draw_mode = dwords[0];
2434 }
2435 
2436 /* execute compute shader */
2437 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2438 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2439 {
2440    do_query("compute", 0);
2441    dump_register_summary(level);
2442 }
2443 
2444 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2445 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2446 {
2447    uint64_t addr;
2448 
2449    if (is_64b()) {
2450       addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2451    } else {
2452       addr = dwords[1];
2453    }
2454 
2455    printl(3, "%saddr: %016llx\n", levels[level], addr);
2456    dump_gpuaddr_size(addr, level, 0x10, 2);
2457 
2458    do_query("compute", 0);
2459    dump_register_summary(level);
2460 }
2461 
2462 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2463 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2464 {
2465    render_mode = rnn_enumname(rnn, "a6xx_marker", dwords[0] & 0xf);
2466 
2467    if (!strcmp(render_mode, "RM6_BINNING")) {
2468       enable_mask = MODE_BINNING;
2469    } else if (!strcmp(render_mode, "RM6_GMEM")) {
2470       enable_mask = MODE_GMEM;
2471    } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2472       enable_mask = MODE_BYPASS;
2473    }
2474 }
2475 
2476 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2477 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2478 {
2479    uint64_t addr;
2480    uint32_t *ptr, len;
2481 
2482    assert(is_64b());
2483 
2484    /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2485     * not sure if this can come in different sizes.
2486     *
2487     * First ptr doesn't seem to be cmdstream, second one does.
2488     *
2489     * Comment from downstream kernel:
2490     *
2491     * SRM -- set render mode (ex binning, direct render etc)
2492     * SRM is set by UMD usually at start of IB to tell CP the type of
2493     * preemption.
2494     * KMD needs to set SRM to NULL to indicate CP that rendering is
2495     * done by IB.
2496     * ------------------------------------------------------------------
2497     *
2498     * Seems to always be one of these two:
2499     * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2500     * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2501     * 001c2000 00000000
2502     *
2503     */
2504 
2505    assert(options->gpu_id >= 500);
2506 
2507    render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2508 
2509    if (sizedwords == 1)
2510       return;
2511 
2512    addr = dwords[1];
2513    addr |= ((uint64_t)dwords[2]) << 32;
2514 
2515    mode = dwords[3];
2516 
2517    dump_gpuaddr(addr, level + 1);
2518 
2519    if (sizedwords == 5)
2520       return;
2521 
2522    assert(sizedwords == 8);
2523 
2524    len = dwords[5];
2525    addr = dwords[6];
2526    addr |= ((uint64_t)dwords[7]) << 32;
2527 
2528    printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2529    printl(3, "%slen:  0x%x\n", levels[level], len);
2530 
2531    ptr = hostptr(addr);
2532 
2533    if (ptr) {
2534       if (!quiet(2)) {
2535          ib++;
2536          dump_commands(ptr, len, level + 1);
2537          ib--;
2538          dump_hex(ptr, len, level + 1);
2539       }
2540    }
2541 }
2542 
2543 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2544 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2545 {
2546    uint64_t addr;
2547    uint32_t *ptr, len;
2548 
2549    assert(is_64b());
2550    assert(options->gpu_id >= 500);
2551 
2552    assert(sizedwords == 8);
2553 
2554    addr = dwords[5];
2555    addr |= ((uint64_t)dwords[6]) << 32;
2556    len = dwords[7];
2557 
2558    printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2559    printl(3, "%slen:  0x%x\n", levels[level], len);
2560 
2561    ptr = hostptr(addr);
2562 
2563    if (ptr) {
2564       if (!quiet(2)) {
2565          ib++;
2566          dump_commands(ptr, len, level + 1);
2567          ib--;
2568          dump_hex(ptr, len, level + 1);
2569       }
2570    }
2571 }
2572 
2573 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2574 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2575 {
2576    do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2577    print_mode(level);
2578    dump_register_summary(level);
2579 }
2580 
2581 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2582 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2583 {
2584    int i;
2585 
2586    /* NOTE: seems to write same reg multiple times.. not sure if different parts
2587     * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2588     * actually are?)
2589     */
2590    bool saved_summary = summary;
2591    summary = false;
2592 
2593    for (i = 0; i < sizedwords; i += 2) {
2594       dump_register(dwords[i + 0], dwords[i + 1], level + 1);
2595       reg_set(dwords[i + 0], dwords[i + 1]);
2596    }
2597 
2598    summary = saved_summary;
2599 }
2600 
2601 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2602 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2603 {
2604    uint32_t reg = dwords[1] & 0xffff;
2605 
2606    dump_register(reg, dwords[2], level + 1);
2607    reg_set(reg, dwords[2]);
2608 }
2609 
2610 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2611 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2612 {
2613    uint64_t addr;
2614    uint32_t size = dwords[2] & 0xffff;
2615    void *ptr;
2616 
2617    addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2618 
2619    if (!quiet(3)) {
2620       printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2621    }
2622 
2623    ptr = hostptr(addr);
2624    if (ptr) {
2625       dump_commands(ptr, size, level + 1);
2626    }
2627 }
2628 
2629 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2630 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2631 {
2632    skip_ib2_enable_global = dwords[0];
2633 }
2634 
2635 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2636 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2637 {
2638    skip_ib2_enable_local = dwords[0];
2639 }
2640 
2641 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2642 static const struct type3_op {
2643    const char *name;
2644    void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2645    struct {
2646       bool load_all_groups;
2647    } options;
2648 } type3_op[] = {
2649    CP(NOP, cp_nop),
2650    CP(INDIRECT_BUFFER, cp_indirect),
2651    CP(INDIRECT_BUFFER_PFD, cp_indirect),
2652    CP(WAIT_FOR_IDLE, cp_wfi),
2653    CP(REG_RMW, cp_rmw),
2654    CP(REG_TO_MEM, cp_reg_mem),
2655    CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2656    CP(MEM_WRITE, cp_mem_write),
2657    CP(EVENT_WRITE, cp_event_write),
2658    CP(RUN_OPENCL, cp_run_cl),
2659    CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2660    CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2661    CP(SET_CONSTANT, cp_set_const),
2662    CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2663    CP(WIDE_REG_WRITE, cp_wide_reg_write),
2664 
2665    /* for a3xx */
2666    CP(LOAD_STATE, cp_load_state),
2667    CP(SET_BIN, cp_set_bin),
2668 
2669    /* for a4xx */
2670    CP(LOAD_STATE4, cp_load_state),
2671    CP(SET_DRAW_STATE, cp_set_draw_state),
2672    CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2673    CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2674    CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2675 
2676    /* for a5xx */
2677    CP(SET_RENDER_MODE, cp_set_render_mode),
2678    CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2679    CP(BLIT, cp_blit),
2680    CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2681    CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2682    CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2683    CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2684    CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2685    CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2686 
2687    /* for a6xx */
2688    CP(LOAD_STATE6_GEOM, cp_load_state),
2689    CP(LOAD_STATE6_FRAG, cp_load_state),
2690    CP(LOAD_STATE6, cp_load_state),
2691    CP(SET_MODE, cp_set_mode),
2692    CP(SET_MARKER, cp_set_marker),
2693    CP(REG_WRITE, cp_reg_write),
2694 
2695    CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2696 
2697    CP(START_BIN, cp_start_bin),
2698 };
2699 
2700 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)2701 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2702 {
2703 }
2704 
2705 static const struct type3_op *
get_type3_op(unsigned opc)2706 get_type3_op(unsigned opc)
2707 {
2708    static const struct type3_op dummy_op = {
2709       .fxn = noop_fxn,
2710    };
2711    const char *name = pktname(opc);
2712 
2713    if (!name)
2714       return &dummy_op;
2715 
2716    for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2717       if (!strcmp(name, type3_op[i].name))
2718          return &type3_op[i];
2719 
2720    return &dummy_op;
2721 }
2722 
2723 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)2724 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2725 {
2726    int dwords_left = sizedwords;
2727    uint32_t count = 0; /* dword count including packet header */
2728    uint32_t val;
2729 
2730    //	assert(dwords);
2731    if (!dwords) {
2732       printf("NULL cmd buffer!\n");
2733       return;
2734    }
2735 
2736    assert(ib < ARRAY_SIZE(draws));
2737    draws[ib] = 0;
2738 
2739    while (dwords_left > 0) {
2740 
2741       current_draw_count = draw_count;
2742 
2743       /* hack, this looks like a -1 underflow, in some versions
2744        * when it tries to write zero registers via pkt0
2745        */
2746       //		if ((dwords[0] >> 16) == 0xffff)
2747       //			goto skip;
2748 
2749       if (pkt_is_type0(dwords[0])) {
2750          printl(3, "t0");
2751          count = type0_pkt_size(dwords[0]) + 1;
2752          val = type0_pkt_offset(dwords[0]);
2753          assert(val < regcnt());
2754          printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1),
2755                 (dwords[0] & 0x8000) ? " (same register)" : "", val);
2756          dump_registers(val, dwords + 1, count - 1, level + 2);
2757          if (!quiet(3))
2758             dump_hex(dwords, count, level + 1);
2759       } else if (pkt_is_type4(dwords[0])) {
2760          /* basically the same(ish) as type0 prior to a5xx */
2761          printl(3, "t4");
2762          count = type4_pkt_size(dwords[0]) + 1;
2763          val = type4_pkt_offset(dwords[0]);
2764          assert(val < regcnt());
2765          printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
2766                 val);
2767          dump_registers(val, dwords + 1, count - 1, level + 2);
2768          if (!quiet(3))
2769             dump_hex(dwords, count, level + 1);
2770 #if 0
2771       } else if (pkt_is_type1(dwords[0])) {
2772          printl(3, "t1");
2773          count = 3;
2774          val = dwords[0] & 0xfff;
2775          printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2776          dump_registers(val, dwords+1, 1, level+2);
2777          val = (dwords[0] >> 12) & 0xfff;
2778          printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2779          dump_registers(val, dwords+2, 1, level+2);
2780          if (!quiet(3))
2781             dump_hex(dwords, count, level+1);
2782       } else if (pkt_is_type2(dwords[0])) {
2783          printl(3, "t2");
2784          printf("%sNOP\n", levels[level+1]);
2785          count = 1;
2786          if (!quiet(3))
2787             dump_hex(dwords, count, level+1);
2788 #endif
2789       } else if (pkt_is_type3(dwords[0])) {
2790          count = type3_pkt_size(dwords[0]) + 1;
2791          val = cp_type3_opcode(dwords[0]);
2792          const struct type3_op *op = get_type3_op(val);
2793          if (op->options.load_all_groups)
2794             load_all_groups(level + 1);
2795          printl(3, "t3");
2796          const char *name = pktname(val);
2797          if (!quiet(2)) {
2798             printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2799                    rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2800                    count, (dwords[0] & 0x1) ? " (predicated)" : "");
2801          }
2802          if (name)
2803             dump_domain(dwords + 1, count - 1, level + 2, name);
2804          op->fxn(dwords + 1, count - 1, level + 1);
2805          if (!quiet(2))
2806             dump_hex(dwords, count, level + 1);
2807       } else if (pkt_is_type7(dwords[0])) {
2808          count = type7_pkt_size(dwords[0]) + 1;
2809          val = cp_type7_opcode(dwords[0]);
2810          const struct type3_op *op = get_type3_op(val);
2811          if (op->options.load_all_groups)
2812             load_all_groups(level + 1);
2813          printl(3, "t7");
2814          const char *name = pktname(val);
2815          if (!quiet(2)) {
2816             printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2817                    rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2818                    count);
2819          }
2820          if (name) {
2821             /* special hack for two packets that decode the same way
2822              * on a6xx:
2823              */
2824             if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2825                 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
2826                name = "CP_LOAD_STATE6";
2827             dump_domain(dwords + 1, count - 1, level + 2, name);
2828          }
2829          op->fxn(dwords + 1, count - 1, level + 1);
2830          if (!quiet(2))
2831             dump_hex(dwords, count, level + 1);
2832       } else if (pkt_is_type2(dwords[0])) {
2833          printl(3, "t2");
2834          printl(3, "%snop\n", levels[level + 1]);
2835       } else {
2836          /* for 5xx+ we can do a passable job of looking for start of next valid
2837           * packet: */
2838          if (options->gpu_id >= 500) {
2839             while (dwords_left > 0) {
2840                if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2841                   break;
2842                printf("bad type! %08x\n", dwords[0]);
2843                dwords++;
2844                dwords_left--;
2845             }
2846          } else {
2847             printf("bad type! %08x\n", dwords[0]);
2848             return;
2849          }
2850       }
2851 
2852       dwords += count;
2853       dwords_left -= count;
2854    }
2855 
2856    if (dwords_left < 0)
2857       printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2858 }
2859