1 /*
2  * Copyright (c) 2018 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 /*
25  * Decoder for "new" GL_OES_get_program_binary format.
26  *
27  * Overall structure is:
28  *
29  *   - header at top, contains, amongst other things, offsets of
30  *     per shader stage sections.
31  *   - per shader stage section (shader_info) starts with a header,
32  *     followed by a variably length list of descriptors.  Each
33  *     descriptor has a type/count/size plus offset from the start
34  *     of shader_info section where the data is found
35  */
36 
37 #include <assert.h>
38 #include <ctype.h>
39 #include <fcntl.h>
40 #include <stddef.h>
41 #include <stdint.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #include <sys/stat.h>
47 #include <sys/types.h>
48 
49 #include "disasm.h"
50 #include "io.h"
51 #include "redump.h"
52 #include "util.h"
53 
54 const char *infile;
55 static int dump_full = 0;
56 static int dump_offsets = 0;
57 static int gpu_id = 320;
58 static int shaderdb = 0; /* output shaderdb style traces to stderr */
59 
60 struct state {
61    char *buf;
62    int sz;
63    int lvl;
64 
65    /* current shader_info section, some offsets calculated relative to
66     * this, rather than relative to start of buffer.
67     */
68    void *shader;
69 
70    /* size of each entry within a shader_descriptor_blk: */
71    int desc_size;
72 
73    const char *shader_type;
74    int full_regs;
75    int half_regs;
76 };
77 
78 #define PACKED __attribute__((__packed__))
79 
80 #define OFF(field)                                                             \
81    do {                                                                        \
82       if (dump_offsets)                                                        \
83          printf("%08x: ", (uint32_t)((char *)&field - state->buf));            \
84    } while (0)
85 
86 /* decode field as hex */
87 #define X(s, field)                                                            \
88    do {                                                                        \
89       OFF(s->field);                                                           \
90       printf("%s%12s:\t0x%x\n", tab(state->lvl), #field, s->field);            \
91    } while (0)
92 
93 /* decode field as digit */
94 #define D(s, field)                                                            \
95    do {                                                                        \
96       OFF(s->field);                                                           \
97       printf("%s%12s:\t%u\n", tab(state->lvl), #field, s->field);              \
98    } while (0)
99 
100 /* decode field as float/hex */
101 #define F(s, field)                                                            \
102    do {                                                                        \
103       OFF(s->field);                                                           \
104       printf("%s%12s:\t%f (0x%0x)\n", tab(state->lvl), #field, uif(s->field),  \
105              s->field);                                                        \
106    } while (0)
107 
108 /* decode field as register: (type is 'r' or 'c') */
109 #define R(s, field, type)                                                      \
110    do {                                                                        \
111       OFF(s->field);                                                           \
112       printf("%s%12s:\t%c%u.%c\n", tab(state->lvl), #field, type,              \
113              (s->field >> 2), "xyzw"[s->field & 0x3]);                         \
114    } while (0)
115 
116 /* decode inline string (presumably null terminated?) */
117 #define S(s, field)                                                            \
118    do {                                                                        \
119       OFF(s->field);                                                           \
120       printf("%s%12s:\t%s\n", tab(state->lvl), #field, s->field);              \
121    } while (0)
122 
123 /* decode string-table string */
124 #define T(s, field) TODO
125 
126 /* decode field as unknown */
127 #define U(s, start, end)                                                       \
128    dump_unknown(state, s->unk_##start##_##end, 0x##start,                      \
129                 (4 + 0x##end - 0x##start) / 4)
130 
131 /* decode field as offset to other section */
132 #define O(s, field, type)                                                      \
133    do {                                                                        \
134       X(s, field);                                                             \
135       assert(s->field < state->sz);                                            \
136       void *_p = &state->buf[s->field];                                        \
137       state->lvl++;                                                            \
138       decode_##type(state, _p);                                                \
139       state->lvl--;                                                            \
140    } while (0)
141 
142 struct shader_info;
143 static void decode_shader_info(struct state *state, struct shader_info *info);
144 
145 static void
dump_unknown(struct state * state,void * buf,unsigned start,unsigned n)146 dump_unknown(struct state *state, void *buf, unsigned start, unsigned n)
147 {
148    uint32_t *ptr = buf;
149    uint8_t *ascii = buf;
150 
151    for (unsigned i = 0; i < n; i++) {
152       uint32_t d = ptr[i];
153 
154       if (dump_offsets)
155          printf("%08x:", (uint32_t)((char *)&ptr[i] - state->buf));
156 
157       printf("%s        %04x:\t%08x", tab(state->lvl), start + i * 4, d);
158 
159       printf("\t|");
160       for (unsigned j = 0; j < 4; j++) {
161          uint8_t c = *(ascii++);
162          printf("%c", (isascii(c) && !iscntrl(c)) ? c : '.');
163       }
164       printf("|\t%f", uif(d));
165 
166       /* TODO maybe scan for first non-null and non-ascii char starting from
167        * end of shader binary to (roughly) establish the start of the string
168        * table.. that would be a bit better filter for deciding if something
169        * might be a pointer into the string table.  Also, the previous char
170        * to what it points to should probably be null.
171        */
172       if ((d < state->sz) && isascii(state->buf[d]) &&
173           (strlen(&state->buf[d]) > 2) && isascii(state->buf[d + 1]))
174          printf("\t<== %s", &state->buf[d]);
175 
176       printf("\n");
177    }
178 }
179 
180 struct PACKED header {
181    uint32_t version; /* I guess, always b10bcace ? */
182    uint32_t unk_0004_0014[5];
183    uint32_t size;
184    uint32_t size2; /* just to be sure? */
185    uint32_t unk_0020_0020[1];
186    uint32_t
187       chksum; /* I guess?  Small changes seem to result in big diffs here */
188    uint32_t unk_0028_0050[11];
189    uint32_t fs_info; /* offset of FS shader_info section */
190    uint32_t unk_0058_0090[15];
191    uint32_t vs_info; /* offset of VS shader_info section */
192    uint32_t unk_0098_00b0[7];
193    uint32_t vs_info2; /* offset of VS shader_info section (again?) */
194    uint32_t unk_00b8_0110[23];
195    uint32_t bs_info; /* offset of binning shader_info section */
196 };
197 
198 static void
decode_header(struct state * state,struct header * hdr)199 decode_header(struct state *state, struct header *hdr)
200 {
201    X(hdr, version);
202    U(hdr, 0004, 0014);
203    X(hdr, size);
204    X(hdr, size2);
205    U(hdr, 0020, 0020);
206    X(hdr, chksum);
207    U(hdr, 0028, 0050);
208    state->shader_type = "FRAG";
209    O(hdr, fs_info, shader_info);
210    U(hdr, 0058, 0090);
211    state->shader_type = "VERT";
212    O(hdr, vs_info, shader_info);
213    U(hdr, 0098, 00b0);
214    assert(hdr->vs_info ==
215           hdr->vs_info2); /* not sure what this if it is ever different */
216    X(hdr, vs_info2);
217    U(hdr, 00b8, 0110);
218    state->shader_type = "BVERT";
219    O(hdr, bs_info, shader_info);
220 
221    /* not sure how much of the rest of contents before start of fs_info
222     * is the header, vs other things.. just dump it all as unknown for
223     * now:
224     */
225    dump_unknown(state, (void *)hdr + sizeof(*hdr), sizeof(*hdr),
226                 (hdr->fs_info - sizeof(*hdr)) / 4);
227 }
228 
229 struct PACKED shader_entry_point {
230    /* entry point name, ie. "main" of TBD length, followed by unknown */
231    char name[8];
232 };
233 
234 static void
decode_shader_entry_point(struct state * state,struct shader_entry_point * e)235 decode_shader_entry_point(struct state *state, struct shader_entry_point *e)
236 {
237    S(e, name);
238 }
239 
240 struct PACKED shader_config {
241    uint32_t unk_0000_0008[3];
242    uint32_t full_regs;
243    uint32_t half_regs;
244 };
245 
246 static void
decode_shader_config(struct state * state,struct shader_config * cfg)247 decode_shader_config(struct state *state, struct shader_config *cfg)
248 {
249    U(cfg, 0000, 0008);
250    D(cfg, full_regs);
251    D(cfg, half_regs);
252 
253    state->full_regs = cfg->full_regs;
254    state->half_regs = cfg->half_regs;
255 
256    /* dump reset of unknown (size differs btwn versions) */
257    dump_unknown(state, (void *)cfg + sizeof(*cfg), sizeof(*cfg),
258                 (state->desc_size - sizeof(*cfg)) / 4);
259 }
260 
261 struct PACKED shader_io_block {
262    /* name of TBD length followed by unknown.. 42 dwords total */
263    char name[20];
264    uint32_t unk_0014_00a4[37];
265 };
266 
267 static void
decode_shader_io_block(struct state * state,struct shader_io_block * io)268 decode_shader_io_block(struct state *state, struct shader_io_block *io)
269 {
270    S(io, name);
271    U(io, 0014, 00a4);
272 }
273 
274 struct PACKED shader_constant_block {
275    uint32_t value;
276    uint32_t unk_0004_000c[3];
277    uint32_t regid;
278    uint32_t unk_0014_0024[5];
279 };
280 
281 static void
decode_shader_constant_block(struct state * state,struct shader_constant_block * c)282 decode_shader_constant_block(struct state *state,
283                              struct shader_constant_block *c)
284 {
285    F(c, value);
286    U(c, 0004, 000c);
287    R(c, regid, 'c');
288    U(c, 0014, 0024);
289 }
290 
291 enum {
292    ENTRY_POINT = 0,   /* shader_entry_point */
293    SHADER_CONFIG = 1, /* XXX placeholder name */
294    SHADER_INPUT = 2,  /* shader_io_block */
295    SHADER_OUTPUT = 3, /* shader_io_block */
296    CONSTANTS = 6,     /* shader_constant_block */
297    INTERNAL = 8,      /* internal input, like bary.f coord */
298    SHADER = 10,
299 } shader_info_block_type;
300 
301 /* Refers to location of some type of records, with an offset relative to
302  * start of shader_info block.
303  */
304 struct PACKED shader_descriptor_block {
305    uint32_t type;   /* block type */
306    uint32_t offset; /* offset (relative to start of shader_info block) */
307    uint32_t size;   /* size in bytes */
308    uint32_t count;  /* number of records */
309    uint32_t unk_0010_0010[1];
310 };
311 
312 static void
decode_shader_descriptor_block(struct state * state,struct shader_descriptor_block * blk)313 decode_shader_descriptor_block(struct state *state,
314                                struct shader_descriptor_block *blk)
315 {
316    D(blk, type);
317    X(blk, offset);
318    D(blk, size);
319    D(blk, count);
320    U(blk, 0010, 0010);
321 
322    /* offset relative to current shader block: */
323    void *ptr = state->shader + blk->offset;
324 
325    if (blk->count == 0) {
326       assert(blk->size == 0);
327    } else {
328       assert((blk->size % blk->count) == 0);
329    }
330 
331    state->desc_size = blk->size / blk->count;
332    state->lvl++;
333    for (unsigned i = 0; i < blk->count; i++) {
334       switch (blk->type) {
335       case ENTRY_POINT:
336          printf("%sentry point %u:\n", tab(state->lvl - 1), i);
337          decode_shader_entry_point(state, ptr);
338          break;
339       case SHADER_CONFIG:
340          printf("%sconfig %u:\n", tab(state->lvl - 1), i);
341          decode_shader_config(state, ptr);
342          break;
343       case SHADER_INPUT:
344          printf("%sinput %u:\n", tab(state->lvl - 1), i);
345          decode_shader_io_block(state, ptr);
346          break;
347       case SHADER_OUTPUT:
348          printf("%soutput %u:\n", tab(state->lvl - 1), i);
349          decode_shader_io_block(state, ptr);
350          break;
351       case INTERNAL:
352          printf("%sinternal input %u:\n", tab(state->lvl - 1), i);
353          decode_shader_io_block(state, ptr);
354          break;
355       case CONSTANTS:
356          printf("%sconstant %u:\n", tab(state->lvl - 1), i);
357          decode_shader_constant_block(state, ptr);
358          break;
359       case SHADER: {
360          struct shader_stats stats;
361          printf("%sshader %u:\n", tab(state->lvl - 1), i);
362          disasm_a3xx_stat(ptr, blk->size / 4, state->lvl, stdout, gpu_id,
363                           &stats);
364          if (shaderdb) {
365             unsigned dwords = 2 * stats.instlen;
366 
367             if (gpu_id >= 400) {
368                dwords = ALIGN(dwords, 16 * 2);
369             } else {
370                dwords = ALIGN(dwords, 4 * 2);
371             }
372 
373             unsigned half_regs = state->half_regs;
374             unsigned full_regs = state->full_regs;
375 
376             /* On a6xx w/ merged/conflicting half and full regs, the
377              * full_regs footprint will be max of full_regs and half
378              * of half_regs.. we only care about which value is higher.
379              */
380             if (gpu_id >= 600) {
381                /* footprint of half_regs in units of full_regs: */
382                unsigned half_full = (half_regs + 1) / 2;
383                if (half_full > full_regs)
384                   full_regs = half_full;
385                half_regs = 0;
386             }
387 
388             fprintf(stderr,
389                     "%s shader: %u inst, %u nops, %u non-nops, %u dwords, "
390                     "%u half, %u full, %u constlen, "
391                     "%u (ss), %u (sy), %d max_sun, %d loops\n",
392                     state->shader_type, stats.instructions, stats.nops,
393                     stats.instructions - stats.nops, dwords, half_regs,
394                     full_regs, stats.constlen, stats.ss, stats.sy, 0,
395                     0); /* max_sun or loops not possible */
396          }
397          /* this is a special case in a way, blk->count is # of
398           * instructions but disasm_a3xx() decodes all instructions,
399           * so just bail.
400           */
401          i = blk->count;
402          break;
403       }
404       default:
405          dump_unknown(state, ptr, 0, state->desc_size / 4);
406          break;
407       }
408       ptr += state->desc_size;
409    }
410    state->lvl--;
411 }
412 
413 /* there looks like one of these per shader, followed by "main" and
414  * some more info, and then the shader itself.
415  */
416 struct PACKED shader_info {
417    uint32_t unk_0000_0010[5];
418    uint32_t desc_off; /* offset to first descriptor block */
419    uint32_t num_blocks;
420 };
421 
422 static void
decode_shader_info(struct state * state,struct shader_info * info)423 decode_shader_info(struct state *state, struct shader_info *info)
424 {
425    assert((info->desc_off % 4) == 0);
426 
427    U(info, 0000, 0010);
428    X(info, desc_off);
429    D(info, num_blocks);
430 
431    dump_unknown(state, &info[1], 0, (info->desc_off - sizeof(*info)) / 4);
432 
433    state->shader = info;
434 
435    struct shader_descriptor_block *blocks = ((void *)info) + info->desc_off;
436    for (unsigned i = 0; i < info->num_blocks; i++) {
437       printf("%sdescriptor %u:\n", tab(state->lvl), i);
438       state->lvl++;
439       decode_shader_descriptor_block(state, &blocks[i]);
440       state->lvl--;
441    }
442 }
443 
444 static void
dump_program(struct state * state)445 dump_program(struct state *state)
446 {
447    struct header *hdr = (void *)state->buf;
448 
449    if (dump_full)
450       dump_unknown(state, state->buf, 0, state->sz / 4);
451 
452    decode_header(state, hdr);
453 }
454 
455 int
main(int argc,char ** argv)456 main(int argc, char **argv)
457 {
458    enum rd_sect_type type = RD_NONE;
459    enum debug_t debug = PRINT_RAW | PRINT_STATS;
460    void *buf = NULL;
461    int sz;
462    struct io *io;
463    int raw_program = 0;
464 
465    /* lame argument parsing: */
466 
467    while (1) {
468       if ((argc > 1) && !strcmp(argv[1], "--verbose")) {
469          debug |= PRINT_RAW | PRINT_VERBOSE;
470          argv++;
471          argc--;
472          continue;
473       }
474       if ((argc > 1) && !strcmp(argv[1], "--expand")) {
475          debug |= EXPAND_REPEAT;
476          argv++;
477          argc--;
478          continue;
479       }
480       if ((argc > 1) && !strcmp(argv[1], "--full")) {
481          /* only short dump, original shader, symbol table, and disassembly */
482          dump_full = 1;
483          argv++;
484          argc--;
485          continue;
486       }
487       if ((argc > 1) && !strcmp(argv[1], "--dump-offsets")) {
488          dump_offsets = 1;
489          argv++;
490          argc--;
491          continue;
492       }
493       if ((argc > 1) && !strcmp(argv[1], "--raw")) {
494          raw_program = 1;
495          argv++;
496          argc--;
497          continue;
498       }
499       if ((argc > 1) && !strcmp(argv[1], "--shaderdb")) {
500          shaderdb = 1;
501          argv++;
502          argc--;
503          continue;
504       }
505       break;
506    }
507 
508    if (argc != 2) {
509       fprintf(stderr, "usage: pgmdump2 [--verbose] [--expand] [--full] "
510                       "[--dump-offsets] [--raw] [--shaderdb] testlog.rd\n");
511       return -1;
512    }
513 
514    disasm_a3xx_set_debug(debug);
515 
516    infile = argv[1];
517 
518    io = io_open(infile);
519    if (!io) {
520       fprintf(stderr, "could not open: %s\n", infile);
521       return -1;
522    }
523 
524    if (raw_program) {
525       io_readn(io, &sz, 4);
526       free(buf);
527 
528       /* note: allow hex dumps to go a bit past the end of the buffer..
529        * might see some garbage, but better than missing the last few bytes..
530        */
531       buf = calloc(1, sz + 3);
532       io_readn(io, buf + 4, sz);
533       (*(int *)buf) = sz;
534 
535       struct state state = {
536          .buf = buf,
537          .sz = sz,
538       };
539       printf("############################################################\n");
540       printf("program:\n");
541       dump_program(&state);
542       printf("############################################################\n");
543       return 0;
544    }
545 
546    /* figure out what sort of input we are dealing with: */
547    if (!(check_extension(infile, ".rd") || check_extension(infile, ".rd.gz"))) {
548       int ret;
549       buf = calloc(1, 100 * 1024);
550       ret = io_readn(io, buf, 100 * 1024);
551       if (ret < 0) {
552          fprintf(stderr, "error: %m");
553          return -1;
554       }
555       return disasm_a3xx(buf, ret / 4, 0, stdout, gpu_id);
556    }
557 
558    while ((io_readn(io, &type, sizeof(type)) > 0) &&
559           (io_readn(io, &sz, 4) > 0)) {
560       free(buf);
561 
562       /* note: allow hex dumps to go a bit past the end of the buffer..
563        * might see some garbage, but better than missing the last few bytes..
564        */
565       buf = calloc(1, sz + 3);
566       io_readn(io, buf, sz);
567 
568       switch (type) {
569       case RD_TEST:
570          if (dump_full)
571             printf("test: %s\n", (char *)buf);
572          break;
573       case RD_VERT_SHADER:
574          printf("vertex shader:\n%s\n", (char *)buf);
575          break;
576       case RD_FRAG_SHADER:
577          printf("fragment shader:\n%s\n", (char *)buf);
578          break;
579       case RD_PROGRAM: {
580          struct state state = {
581             .buf = buf,
582             .sz = sz,
583          };
584          printf(
585             "############################################################\n");
586          printf("program:\n");
587          dump_program(&state);
588          printf(
589             "############################################################\n");
590          break;
591       }
592       case RD_GPU_ID:
593          gpu_id = *((unsigned int *)buf);
594          printf("gpu_id: %d\n", gpu_id);
595          break;
596       default:
597          break;
598       }
599    }
600 
601    io_close(io);
602 
603    return 0;
604 }
605