1 /*
2  * Copyright (C) 2017-2019 Alyssa Rosenzweig
3  * Copyright (C) 2017-2019 Connor Abbott
4  * Copyright (C) 2019 Collabora, Ltd.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include <genxml/gen_macros.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <memory.h>
30 #include <stdbool.h>
31 #include <stdarg.h>
32 #include <errno.h>
33 #include <ctype.h>
34 #include "decode.h"
35 
36 #include "midgard/disassemble.h"
37 #include "bifrost/disassemble.h"
38 
39 #define DUMP_UNPACKED(T, var, ...) { \
40         pandecode_log(__VA_ARGS__); \
41         pan_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2); \
42 }
43 
44 #define DUMP_CL(T, cl, ...) {\
45         pan_unpack(cl, T, temp); \
46         DUMP_UNPACKED(T, temp, __VA_ARGS__); \
47 }
48 
49 #define DUMP_SECTION(A, S, cl, ...) { \
50         pan_section_unpack(cl, A, S, temp); \
51         pandecode_log(__VA_ARGS__); \
52         pan_section_print(pandecode_dump_stream, A, S, temp, (pandecode_indent + 1) * 2); \
53 }
54 
55 #define MAP_ADDR(T, addr, cl) \
56         const uint8_t *cl = 0; \
57         { \
58                 struct pandecode_mapped_memory *mapped_mem = pandecode_find_mapped_gpu_mem_containing(addr); \
59                 cl = pandecode_fetch_gpu_mem(mapped_mem, addr, pan_size(T)); \
60         }
61 
62 #define DUMP_ADDR(T, addr, ...) {\
63         MAP_ADDR(T, addr, cl) \
64         DUMP_CL(T, cl, __VA_ARGS__); \
65 }
66 
67 /* Semantic logging type.
68  *
69  * Raw: for raw messages to be printed as is.
70  * Message: for helpful information to be commented out in replays.
71  *
72  * Use one of pandecode_log or pandecode_msg as syntax sugar.
73  */
74 
75 enum pandecode_log_type {
76         PANDECODE_RAW,
77         PANDECODE_MESSAGE,
78 };
79 
80 #define pandecode_log(...)  pandecode_log_typed(PANDECODE_RAW,      __VA_ARGS__)
81 #define pandecode_msg(...)  pandecode_log_typed(PANDECODE_MESSAGE,  __VA_ARGS__)
82 
83 static unsigned pandecode_indent = 0;
84 
85 static void
pandecode_make_indent(void)86 pandecode_make_indent(void)
87 {
88         for (unsigned i = 0; i < pandecode_indent; ++i)
89                 fprintf(pandecode_dump_stream, "  ");
90 }
91 
92 static void PRINTFLIKE(2, 3)
pandecode_log_typed(enum pandecode_log_type type,const char * format,...)93 pandecode_log_typed(enum pandecode_log_type type, const char *format, ...)
94 {
95         va_list ap;
96 
97         pandecode_make_indent();
98 
99         if (type == PANDECODE_MESSAGE)
100                 fprintf(pandecode_dump_stream, "// ");
101 
102         va_start(ap, format);
103         vfprintf(pandecode_dump_stream, format, ap);
104         va_end(ap);
105 }
106 
107 static void
pandecode_log_cont(const char * format,...)108 pandecode_log_cont(const char *format, ...)
109 {
110         va_list ap;
111 
112         va_start(ap, format);
113         vfprintf(pandecode_dump_stream, format, ap);
114         va_end(ap);
115 }
116 
117 /* To check for memory safety issues, validates that the given pointer in GPU
118  * memory is valid, containing at least sz bytes. The goal is to eliminate
119  * GPU-side memory bugs (NULL pointer dereferences, buffer overflows, or buffer
120  * overruns) by statically validating pointers.
121  */
122 
123 static void
pandecode_validate_buffer(mali_ptr addr,size_t sz)124 pandecode_validate_buffer(mali_ptr addr, size_t sz)
125 {
126         if (!addr) {
127                 pandecode_msg("XXX: null pointer deref");
128                 return;
129         }
130 
131         /* Find a BO */
132 
133         struct pandecode_mapped_memory *bo =
134                 pandecode_find_mapped_gpu_mem_containing(addr);
135 
136         if (!bo) {
137                 pandecode_msg("XXX: invalid memory dereference\n");
138                 return;
139         }
140 
141         /* Bounds check */
142 
143         unsigned offset = addr - bo->gpu_va;
144         unsigned total = offset + sz;
145 
146         if (total > bo->length) {
147                 pandecode_msg("XXX: buffer overrun. "
148                                 "Chunk of size %zu at offset %d in buffer of size %zu. "
149                                 "Overrun by %zu bytes. \n",
150                                 sz, offset, bo->length, total - bo->length);
151                 return;
152         }
153 }
154 
155 #if PAN_ARCH <= 5
156 /* Midgard's tiler descriptor is embedded within the
157  * larger FBD */
158 
159 static void
pandecode_midgard_tiler_descriptor(const struct mali_tiler_context_packed * tp,const struct mali_tiler_weights_packed * wp)160 pandecode_midgard_tiler_descriptor(
161                 const struct mali_tiler_context_packed *tp,
162                 const struct mali_tiler_weights_packed *wp)
163 {
164         pan_unpack(tp, TILER_CONTEXT, t);
165         DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler:\n");
166 
167         /* We've never seen weights used in practice, but they exist */
168         pan_unpack(wp, TILER_WEIGHTS, w);
169         bool nonzero_weights = false;
170 
171         nonzero_weights |= w.weight0 != 0x0;
172         nonzero_weights |= w.weight1 != 0x0;
173         nonzero_weights |= w.weight2 != 0x0;
174         nonzero_weights |= w.weight3 != 0x0;
175         nonzero_weights |= w.weight4 != 0x0;
176         nonzero_weights |= w.weight5 != 0x0;
177         nonzero_weights |= w.weight6 != 0x0;
178         nonzero_weights |= w.weight7 != 0x0;
179 
180         if (nonzero_weights)
181                 DUMP_UNPACKED(TILER_WEIGHTS, w, "Tiler Weights:\n");
182 }
183 #endif
184 
185 /* Information about the framebuffer passed back for
186  * additional analysis */
187 
188 struct pandecode_fbd {
189         unsigned width;
190         unsigned height;
191         unsigned rt_count;
192         bool has_extra;
193 };
194 
195 #if PAN_ARCH == 4
196 static struct pandecode_fbd
pandecode_sfbd(uint64_t gpu_va,int job_no,bool is_fragment,unsigned gpu_id)197 pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
198 {
199         struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
200         const void *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
201 
202         struct pandecode_fbd info = {
203                 .has_extra = false,
204                 .rt_count = 1
205         };
206 
207         pandecode_log("Framebuffer:\n");
208         pandecode_indent++;
209 
210         DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, s, "Local Storage:\n");
211         pan_section_unpack(s, FRAMEBUFFER, PARAMETERS, p);
212         DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, p, "Parameters:\n");
213 
214         const void *t = pan_section_ptr(s, FRAMEBUFFER, TILER);
215         const void *w = pan_section_ptr(s, FRAMEBUFFER, TILER_WEIGHTS);
216 
217         pandecode_midgard_tiler_descriptor(t, w);
218 
219         pandecode_indent--;
220 
221         /* Dummy unpack of the padding section to make sure all words are 0.
222          * No need to call print here since the section is supposed to be empty.
223          */
224         pan_section_unpack(s, FRAMEBUFFER, PADDING_1, padding1);
225         pan_section_unpack(s, FRAMEBUFFER, PADDING_2, padding2);
226         pandecode_log("\n");
227 
228         return info;
229 }
230 #endif
231 
232 #if PAN_ARCH >= 5
233 static void
pandecode_local_storage(uint64_t gpu_va,int job_no)234 pandecode_local_storage(uint64_t gpu_va, int job_no)
235 {
236         struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
237         const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
238         DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n");
239 }
240 
241 static void
pandecode_render_target(uint64_t gpu_va,unsigned job_no,unsigned gpu_id,const struct MALI_FRAMEBUFFER_PARAMETERS * fb)242 pandecode_render_target(uint64_t gpu_va, unsigned job_no, unsigned gpu_id,
243                         const struct MALI_FRAMEBUFFER_PARAMETERS *fb)
244 {
245         pandecode_log("Color Render Targets:\n");
246         pandecode_indent++;
247 
248         for (int i = 0; i < (fb->render_target_count); i++) {
249                 mali_ptr rt_va = gpu_va + i * pan_size(RENDER_TARGET);
250                 struct pandecode_mapped_memory *mem =
251                         pandecode_find_mapped_gpu_mem_containing(rt_va);
252                 const struct mali_render_target_packed *PANDECODE_PTR_VAR(rtp, mem, (mali_ptr) rt_va);
253                 DUMP_CL(RENDER_TARGET, rtp, "Color Render Target %d:\n", i);
254         }
255 
256         pandecode_indent--;
257         pandecode_log("\n");
258 }
259 #endif
260 
261 #if PAN_ARCH >= 6
262 static void
pandecode_sample_locations(const void * fb,int job_no)263 pandecode_sample_locations(const void *fb, int job_no)
264 {
265         pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
266 
267         struct pandecode_mapped_memory *smem =
268                 pandecode_find_mapped_gpu_mem_containing(params.sample_locations);
269 
270         const u16 *PANDECODE_PTR_VAR(samples, smem, params.sample_locations);
271 
272         pandecode_log("Sample locations:\n");
273         for (int i = 0; i < 33; i++) {
274                 pandecode_log("  (%d, %d),\n",
275                                 samples[2 * i] - 128,
276                                 samples[2 * i + 1] - 128);
277         }
278 }
279 #endif
280 
281 static void
282 pandecode_dcd(const struct MALI_DRAW *p,
283               int job_no, enum mali_job_type job_type,
284               char *suffix, unsigned gpu_id);
285 
286 #if PAN_ARCH >= 5
287 static struct pandecode_fbd
pandecode_mfbd_bfr(uint64_t gpu_va,int job_no,bool is_fragment,unsigned gpu_id)288 pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
289 {
290         struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
291         const void *PANDECODE_PTR_VAR(fb, mem, (mali_ptr) gpu_va);
292         pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
293 
294         struct pandecode_fbd info;
295 
296 #if PAN_ARCH >= 6
297         pandecode_sample_locations(fb, job_no);
298 
299         pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, bparams);
300         unsigned dcd_size = pan_size(DRAW);
301         struct pandecode_mapped_memory *dcdmem =
302                 pandecode_find_mapped_gpu_mem_containing(bparams.frame_shader_dcds);
303 
304         if (bparams.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
305                 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (0 * dcd_size));
306                 pan_unpack(dcd, DRAW, draw);
307                 pandecode_log("Pre frame 0:\n");
308                 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id);
309         }
310 
311         if (bparams.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
312                 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (1 * dcd_size));
313                 pan_unpack(dcd, DRAW, draw);
314                 pandecode_log("Pre frame 1:\n");
315                 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id);
316         }
317 
318         if (bparams.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
319                 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (2 * dcd_size));
320                 pan_unpack(dcd, DRAW, draw);
321                 pandecode_log("Post frame:\n");
322                 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id);
323         }
324 #endif
325 
326         pandecode_log("Multi-Target Framebuffer:\n");
327         pandecode_indent++;
328 
329 #if PAN_ARCH <= 5
330         DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
331 #endif
332 
333         info.width = params.width;
334         info.height = params.height;
335         info.rt_count = params.render_target_count;
336         DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
337 
338 #if PAN_ARCH <= 5
339         const void *t = pan_section_ptr(fb, FRAMEBUFFER, TILER);
340         const void *w = pan_section_ptr(fb, FRAMEBUFFER, TILER_WEIGHTS);
341         pandecode_midgard_tiler_descriptor(t, w);
342 #endif
343 
344         pandecode_indent--;
345         pandecode_log("\n");
346 
347         gpu_va += pan_size(FRAMEBUFFER);
348 
349         info.has_extra = params.has_zs_crc_extension;
350 
351         if (info.has_extra) {
352                 struct pandecode_mapped_memory *mem =
353                         pandecode_find_mapped_gpu_mem_containing(gpu_va);
354                 const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(zs_crc, mem, (mali_ptr)gpu_va);
355                 DUMP_CL(ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n");
356                 pandecode_log("\n");
357 
358                 gpu_va += pan_size(ZS_CRC_EXTENSION);
359         }
360 
361         if (is_fragment)
362                 pandecode_render_target(gpu_va, job_no, gpu_id, &params);
363 
364         return info;
365 }
366 #endif
367 
368 static void
pandecode_attributes(const struct pandecode_mapped_memory * mem,mali_ptr addr,int job_no,char * suffix,int count,bool varying,enum mali_job_type job_type)369 pandecode_attributes(const struct pandecode_mapped_memory *mem,
370                             mali_ptr addr, int job_no, char *suffix,
371                             int count, bool varying, enum mali_job_type job_type)
372 {
373         char *prefix = varying ? "Varying" : "Attribute";
374         assert(addr);
375 
376         if (!count) {
377                 pandecode_msg("warn: No %s records\n", prefix);
378                 return;
379         }
380 
381         MAP_ADDR(ATTRIBUTE_BUFFER, addr, cl);
382 
383         for (int i = 0; i < count; ++i) {
384                 pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp);
385                 DUMP_UNPACKED(ATTRIBUTE_BUFFER, temp, "%s:\n", prefix);
386 
387                 switch (temp.type) {
388                 case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION:
389                 case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: {
390                         pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER),
391                                    ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2);
392                         pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT,
393                                   temp2, (pandecode_indent + 1) * 2);
394                         i++;
395                         break;
396                 }
397                 case MALI_ATTRIBUTE_TYPE_3D_LINEAR:
398                 case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: {
399                         pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D),
400                                    ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2);
401                         pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D,
402                                   temp2, (pandecode_indent + 1) * 2);
403                         i++;
404                         break;
405                 }
406                 default:
407                         break;
408                 }
409         }
410         pandecode_log("\n");
411 }
412 
413 #if PAN_ARCH >= 6
414 /* Decodes a Bifrost blend constant. See the notes in bifrost_blend_rt */
415 
416 static mali_ptr
pandecode_bifrost_blend(void * descs,int job_no,int rt_no,mali_ptr frag_shader)417 pandecode_bifrost_blend(void *descs, int job_no, int rt_no, mali_ptr frag_shader)
418 {
419         pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b);
420         DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no);
421         if (b.internal.mode != MALI_BLEND_MODE_SHADER)
422                 return 0;
423 
424         return (frag_shader & 0xFFFFFFFF00000000ULL) | b.internal.shader.pc;
425 }
426 #elif PAN_ARCH == 5
427 static mali_ptr
pandecode_midgard_blend_mrt(void * descs,int job_no,int rt_no)428 pandecode_midgard_blend_mrt(void *descs, int job_no, int rt_no)
429 {
430         pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b);
431         DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no);
432         return b.blend_shader ? (b.shader_pc & ~0xf) : 0;
433 }
434 #endif
435 
436 static unsigned
pandecode_attribute_meta(int count,mali_ptr attribute,bool varying)437 pandecode_attribute_meta(int count, mali_ptr attribute, bool varying)
438 {
439         unsigned max = 0;
440 
441         for (int i = 0; i < count; ++i, attribute += pan_size(ATTRIBUTE)) {
442                 MAP_ADDR(ATTRIBUTE, attribute, cl);
443                 pan_unpack(cl, ATTRIBUTE, a);
444                 DUMP_UNPACKED(ATTRIBUTE, a, "%s:\n", varying ? "Varying" : "Attribute");
445                 max = MAX2(max, a.buffer_index);
446         }
447 
448         pandecode_log("\n");
449         return MIN2(max + 1, 256);
450 }
451 
452 /* return bits [lo, hi) of word */
453 static u32
bits(u32 word,u32 lo,u32 hi)454 bits(u32 word, u32 lo, u32 hi)
455 {
456         if (hi - lo >= 32)
457                 return word; // avoid undefined behavior with the shift
458 
459         if (lo >= 32)
460                 return 0;
461 
462         return (word >> lo) & ((1 << (hi - lo)) - 1);
463 }
464 
465 static void
pandecode_invocation(const void * i)466 pandecode_invocation(const void *i)
467 {
468         /* Decode invocation_count. See the comment before the definition of
469          * invocation_count for an explanation.
470          */
471         pan_unpack(i, INVOCATION, invocation);
472 
473         unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
474         unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1;
475         unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1;
476 
477         unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1;
478         unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1;
479         unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
480 
481         pandecode_log("Invocation (%d, %d, %d) x (%d, %d, %d)\n",
482                       size_x, size_y, size_z,
483                       groups_x, groups_y, groups_z);
484 
485         DUMP_UNPACKED(INVOCATION, invocation, "Invocation:\n")
486 }
487 
488 static void
pandecode_primitive(const void * p)489 pandecode_primitive(const void *p)
490 {
491         pan_unpack(p, PRIMITIVE, primitive);
492         DUMP_UNPACKED(PRIMITIVE, primitive, "Primitive:\n");
493 
494         /* Validate an index buffer is present if we need one. TODO: verify
495          * relationship between invocation_count and index_count */
496 
497         if (primitive.indices) {
498                 /* Grab the size */
499                 unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32) ?
500                         sizeof(uint32_t) : primitive.index_type;
501 
502                 /* Ensure we got a size, and if so, validate the index buffer
503                  * is large enough to hold a full set of indices of the given
504                  * size */
505 
506                 if (!size)
507                         pandecode_msg("XXX: index size missing\n");
508                 else
509                         pandecode_validate_buffer(primitive.indices, primitive.index_count * size);
510         } else if (primitive.index_type)
511                 pandecode_msg("XXX: unexpected index size\n");
512 }
513 
514 static void
pandecode_uniform_buffers(mali_ptr pubufs,int ubufs_count,int job_no)515 pandecode_uniform_buffers(mali_ptr pubufs, int ubufs_count, int job_no)
516 {
517         struct pandecode_mapped_memory *umem = pandecode_find_mapped_gpu_mem_containing(pubufs);
518         uint64_t *PANDECODE_PTR_VAR(ubufs, umem, pubufs);
519 
520         for (int i = 0; i < ubufs_count; i++) {
521                 mali_ptr addr = (ubufs[i] >> 10) << 2;
522                 unsigned size = addr ? (((ubufs[i] & ((1 << 10) - 1)) + 1) * 16) : 0;
523 
524                 pandecode_validate_buffer(addr, size);
525 
526                 char *ptr = pointer_as_memory_reference(addr);
527                 pandecode_log("ubuf_%d[%u] = %s;\n", i, size, ptr);
528                 free(ptr);
529         }
530 
531         pandecode_log("\n");
532 }
533 
534 static void
pandecode_uniforms(mali_ptr uniforms,unsigned uniform_count)535 pandecode_uniforms(mali_ptr uniforms, unsigned uniform_count)
536 {
537         pandecode_validate_buffer(uniforms, uniform_count * 16);
538 
539         char *ptr = pointer_as_memory_reference(uniforms);
540         pandecode_log("vec4 uniforms[%u] = %s;\n", uniform_count, ptr);
541         free(ptr);
542         pandecode_log("\n");
543 }
544 
545 static const char *
shader_type_for_job(unsigned type)546 shader_type_for_job(unsigned type)
547 {
548         switch (type) {
549         case MALI_JOB_TYPE_VERTEX:  return "VERTEX";
550         case MALI_JOB_TYPE_TILER:   return "FRAGMENT";
551         case MALI_JOB_TYPE_FRAGMENT: return "FRAGMENT";
552         case MALI_JOB_TYPE_COMPUTE: return "COMPUTE";
553         default: return "UNKNOWN";
554         }
555 }
556 
557 static unsigned shader_id = 0;
558 
559 static struct midgard_disasm_stats
pandecode_shader_disassemble(mali_ptr shader_ptr,int shader_no,int type,unsigned gpu_id)560 pandecode_shader_disassemble(mali_ptr shader_ptr, int shader_no, int type,
561                              unsigned gpu_id)
562 {
563         struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(shader_ptr);
564         uint8_t *PANDECODE_PTR_VAR(code, mem, shader_ptr);
565 
566         /* Compute maximum possible size */
567         size_t sz = mem->length - (shader_ptr - mem->gpu_va);
568 
569         /* Print some boilerplate to clearly denote the assembly (which doesn't
570          * obey indentation rules), and actually do the disassembly! */
571 
572         pandecode_log_cont("\n\n");
573 
574         struct midgard_disasm_stats stats;
575 
576 #if PAN_ARCH >= 6
577         disassemble_bifrost(pandecode_dump_stream, code, sz, true);
578 
579         /* TODO: Extend stats to Bifrost */
580         stats.texture_count = -128;
581         stats.sampler_count = -128;
582         stats.attribute_count = -128;
583         stats.varying_count = -128;
584         stats.uniform_count = -128;
585         stats.uniform_buffer_count = -128;
586         stats.work_count = -128;
587 
588         stats.instruction_count = 0;
589         stats.bundle_count = 0;
590         stats.quadword_count = 0;
591         stats.helper_invocations = false;
592 #else
593 	stats = disassemble_midgard(pandecode_dump_stream,
594                                     code, sz, gpu_id, true);
595 #endif
596 
597         unsigned nr_threads =
598                 (stats.work_count <= 4) ? 4 :
599                 (stats.work_count <= 8) ? 2 :
600                 1;
601 
602         pandecode_log_cont("shader%d - MESA_SHADER_%s shader: "
603                 "%u inst, %u bundles, %u quadwords, "
604                 "%u registers, %u threads, 0 loops, 0:0 spills:fills\n\n\n",
605                 shader_id++,
606                 shader_type_for_job(type),
607                 stats.instruction_count, stats.bundle_count, stats.quadword_count,
608                 stats.work_count, nr_threads);
609 
610         return stats;
611 }
612 
613 static void
pandecode_texture_payload(mali_ptr payload,enum mali_texture_dimension dim,enum mali_texture_layout layout,bool manual_stride,uint8_t levels,uint16_t nr_samples,uint16_t array_size,struct pandecode_mapped_memory * tmem)614 pandecode_texture_payload(mali_ptr payload,
615                           enum mali_texture_dimension dim,
616                           enum mali_texture_layout layout,
617                           bool manual_stride,
618                           uint8_t levels,
619                           uint16_t nr_samples,
620                           uint16_t array_size,
621                           struct pandecode_mapped_memory *tmem)
622 {
623         pandecode_log(".payload = {\n");
624         pandecode_indent++;
625 
626         /* A bunch of bitmap pointers follow.
627          * We work out the correct number,
628          * based on the mipmap/cubemap
629          * properties, but dump extra
630          * possibilities to futureproof */
631 
632         int bitmap_count = levels;
633 
634         /* Miptree for each face */
635         if (dim == MALI_TEXTURE_DIMENSION_CUBE)
636                 bitmap_count *= 6;
637 
638         /* Array of layers */
639         bitmap_count *= nr_samples;
640 
641         /* Array of textures */
642         bitmap_count *= array_size;
643 
644         /* Stride for each element */
645         if (manual_stride)
646                 bitmap_count *= 2;
647 
648         mali_ptr *pointers_and_strides = pandecode_fetch_gpu_mem(tmem,
649                 payload, sizeof(mali_ptr) * bitmap_count);
650         for (int i = 0; i < bitmap_count; ++i) {
651                 /* How we dump depends if this is a stride or a pointer */
652 
653                 if (manual_stride && (i & 1)) {
654                         /* signed 32-bit snuck in as a 64-bit pointer */
655                         uint64_t stride_set = pointers_and_strides[i];
656                         int32_t line_stride = stride_set;
657                         int32_t surface_stride = stride_set >> 32;
658                         pandecode_log("(mali_ptr) %d /* surface stride */ %d /* line stride */, \n",
659                                       surface_stride, line_stride);
660                 } else {
661                         char *a = pointer_as_memory_reference(pointers_and_strides[i]);
662                         pandecode_log("%s, \n", a);
663                         free(a);
664                 }
665         }
666 
667         pandecode_indent--;
668         pandecode_log("},\n");
669 }
670 
671 #if PAN_ARCH <= 5
672 static void
pandecode_texture(mali_ptr u,struct pandecode_mapped_memory * tmem,unsigned job_no,unsigned tex)673 pandecode_texture(mali_ptr u,
674                 struct pandecode_mapped_memory *tmem,
675                 unsigned job_no, unsigned tex)
676 {
677         struct pandecode_mapped_memory *mapped_mem = pandecode_find_mapped_gpu_mem_containing(u);
678         const uint8_t *cl = pandecode_fetch_gpu_mem(mapped_mem, u, pan_size(TEXTURE));
679 
680         pan_unpack(cl, TEXTURE, temp);
681         DUMP_UNPACKED(TEXTURE, temp, "Texture:\n")
682 
683         pandecode_indent++;
684         unsigned nr_samples = temp.dimension == MALI_TEXTURE_DIMENSION_3D ?
685                               1 : temp.sample_count;
686         pandecode_texture_payload(u + pan_size(TEXTURE),
687                         temp.dimension, temp.texel_ordering, temp.manual_stride,
688                         temp.levels, nr_samples, temp.array_size, mapped_mem);
689         pandecode_indent--;
690 }
691 #else
692 static void
pandecode_bifrost_texture(const void * cl,unsigned job_no,unsigned tex)693 pandecode_bifrost_texture(
694                 const void *cl,
695                 unsigned job_no,
696                 unsigned tex)
697 {
698         pan_unpack(cl, TEXTURE, temp);
699         DUMP_UNPACKED(TEXTURE, temp, "Texture:\n")
700 
701         struct pandecode_mapped_memory *tmem = pandecode_find_mapped_gpu_mem_containing(temp.surfaces);
702         unsigned nr_samples = temp.dimension == MALI_TEXTURE_DIMENSION_3D ?
703                               1 : temp.sample_count;
704         pandecode_indent++;
705         pandecode_texture_payload(temp.surfaces, temp.dimension, temp.texel_ordering,
706                                   true, temp.levels, nr_samples, temp.array_size, tmem);
707         pandecode_indent--;
708 }
709 #endif
710 
711 static void
pandecode_blend_shader_disassemble(mali_ptr shader,int job_no,int job_type,unsigned gpu_id)712 pandecode_blend_shader_disassemble(mali_ptr shader, int job_no, int job_type,
713                                    unsigned gpu_id)
714 {
715         struct midgard_disasm_stats stats =
716                 pandecode_shader_disassemble(shader, job_no, job_type, gpu_id);
717 
718         bool has_texture = (stats.texture_count > 0);
719         bool has_sampler = (stats.sampler_count > 0);
720         bool has_attribute = (stats.attribute_count > 0);
721         bool has_varying = (stats.varying_count > 0);
722         bool has_uniform = (stats.uniform_count > 0);
723         bool has_ubo = (stats.uniform_buffer_count > 0);
724 
725         if (has_texture || has_sampler)
726                 pandecode_msg("XXX: blend shader accessing textures\n");
727 
728         if (has_attribute || has_varying)
729                 pandecode_msg("XXX: blend shader accessing interstage\n");
730 
731         if (has_uniform || has_ubo)
732                 pandecode_msg("XXX: blend shader accessing uniforms\n");
733 }
734 
735 static void
pandecode_textures(mali_ptr textures,unsigned texture_count,int job_no)736 pandecode_textures(mali_ptr textures, unsigned texture_count, int job_no)
737 {
738         struct pandecode_mapped_memory *mmem = pandecode_find_mapped_gpu_mem_containing(textures);
739 
740         if (!mmem)
741                 return;
742 
743         pandecode_log("Textures %"PRIx64"_%d:\n", textures, job_no);
744         pandecode_indent++;
745 
746 #if PAN_ARCH >= 6
747         const void *cl =
748                 pandecode_fetch_gpu_mem(mmem,
749                                         textures,
750                                         pan_size(TEXTURE) *
751                                         texture_count);
752 
753         for (unsigned tex = 0; tex < texture_count; ++tex) {
754                 pandecode_bifrost_texture(cl + pan_size(TEXTURE) * tex,
755                                           job_no, tex);
756         }
757 #else
758         mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures);
759 
760         for (int tex = 0; tex < texture_count; ++tex) {
761                 mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures + tex * sizeof(mali_ptr));
762                 char *a = pointer_as_memory_reference(*u);
763                 pandecode_log("%s,\n", a);
764                 free(a);
765         }
766 
767         /* Now, finally, descend down into the texture descriptor */
768         for (unsigned tex = 0; tex < texture_count; ++tex) {
769                 mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures + tex * sizeof(mali_ptr));
770                 struct pandecode_mapped_memory *tmem = pandecode_find_mapped_gpu_mem_containing(*u);
771                 if (tmem)
772                         pandecode_texture(*u, tmem, job_no, tex);
773         }
774 #endif
775         pandecode_indent--;
776         pandecode_log("\n");
777 }
778 
779 static void
pandecode_samplers(mali_ptr samplers,unsigned sampler_count,int job_no)780 pandecode_samplers(mali_ptr samplers, unsigned sampler_count, int job_no)
781 {
782         pandecode_log("Samplers %"PRIx64"_%d:\n", samplers, job_no);
783         pandecode_indent++;
784 
785         for (int i = 0; i < sampler_count; ++i)
786                 DUMP_ADDR(SAMPLER, samplers + (pan_size(SAMPLER) * i), "Sampler %d:\n", i);
787 
788         pandecode_indent--;
789         pandecode_log("\n");
790 }
791 
792 static void
pandecode_dcd(const struct MALI_DRAW * p,int job_no,enum mali_job_type job_type,char * suffix,unsigned gpu_id)793 pandecode_dcd(const struct MALI_DRAW *p,
794               int job_no, enum mali_job_type job_type,
795               char *suffix, unsigned gpu_id)
796 {
797         struct pandecode_mapped_memory *attr_mem;
798 
799 #if PAN_ARCH >= 5
800         struct pandecode_fbd fbd_info = {
801                 /* Default for Bifrost */
802                 .rt_count = 1
803         };
804 #endif
805 
806 #if PAN_ARCH >= 6
807         pandecode_local_storage(p->thread_storage & ~1, job_no);
808 #elif PAN_ARCH == 5
809         if (job_type != MALI_JOB_TYPE_TILER) {
810                 pandecode_local_storage(p->thread_storage & ~1, job_no);
811 	} else {
812                 assert(p->fbd & MALI_FBD_TAG_IS_MFBD);
813                 fbd_info = pandecode_mfbd_bfr((u64) ((uintptr_t) p->fbd) & ~MALI_FBD_TAG_MASK,
814                                               job_no, false, gpu_id);
815         }
816 #else
817         pandecode_sfbd((u64) (uintptr_t) p->fbd, job_no, false, gpu_id);
818 #endif
819 
820         int varying_count = 0, attribute_count = 0, uniform_count = 0, uniform_buffer_count = 0;
821         int texture_count = 0, sampler_count = 0;
822 
823         if (p->state) {
824                 struct pandecode_mapped_memory *smem = pandecode_find_mapped_gpu_mem_containing(p->state);
825                 uint32_t *cl = pandecode_fetch_gpu_mem(smem, p->state, pan_size(RENDERER_STATE));
826 
827                 pan_unpack(cl, RENDERER_STATE, state);
828 
829                 if (state.shader.shader & ~0xF)
830                         pandecode_shader_disassemble(state.shader.shader & ~0xF, job_no, job_type, gpu_id);
831 
832 #if PAN_ARCH >= 6
833                 bool idvs = (job_type == MALI_JOB_TYPE_INDEXED_VERTEX);
834 
835                 if (idvs && state.secondary_shader)
836                         pandecode_shader_disassemble(state.secondary_shader, job_no, job_type, gpu_id);
837 #endif
838                 DUMP_UNPACKED(RENDERER_STATE, state, "State:\n");
839                 pandecode_indent++;
840 
841                 /* Save for dumps */
842                 attribute_count = state.shader.attribute_count;
843                 varying_count = state.shader.varying_count;
844                 texture_count = state.shader.texture_count;
845                 sampler_count = state.shader.sampler_count;
846                 uniform_buffer_count = state.properties.uniform_buffer_count;
847 
848 #if PAN_ARCH >= 6
849                 uniform_count = state.preload.uniform_count;
850 #else
851                 uniform_count = state.properties.uniform_count;
852 #endif
853 
854 #if PAN_ARCH >= 6
855                 DUMP_UNPACKED(PRELOAD, state.preload, "Preload:\n");
856 #elif PAN_ARCH == 4
857                 mali_ptr shader = state.blend_shader & ~0xF;
858                 if (state.multisample_misc.blend_shader && shader)
859                         pandecode_blend_shader_disassemble(shader, job_no, job_type, gpu_id);
860 #endif
861                 pandecode_indent--;
862                 pandecode_log("\n");
863 
864                 /* MRT blend fields are used whenever MFBD is used, with
865                  * per-RT descriptors */
866 
867 #if PAN_ARCH >= 5
868                 if ((job_type == MALI_JOB_TYPE_TILER || job_type == MALI_JOB_TYPE_FRAGMENT) &&
869                     (PAN_ARCH >= 6 || p->thread_storage & MALI_FBD_TAG_IS_MFBD)) {
870                         void* blend_base = ((void *) cl) + pan_size(RENDERER_STATE);
871 
872                         for (unsigned i = 0; i < fbd_info.rt_count; i++) {
873                                 mali_ptr shader = 0;
874 
875 #if PAN_ARCH >= 6
876                                 shader = pandecode_bifrost_blend(blend_base, job_no, i,
877                                                                  state.shader.shader);
878 #else
879                                 shader = pandecode_midgard_blend_mrt(blend_base, job_no, i);
880 #endif
881                                 if (shader & ~0xF)
882                                         pandecode_blend_shader_disassemble(shader, job_no, job_type,
883                                                                            gpu_id);
884                         }
885                 }
886 #endif
887         } else
888                 pandecode_msg("XXX: missing shader descriptor\n");
889 
890         if (p->viewport) {
891                 DUMP_ADDR(VIEWPORT, p->viewport, "Viewport:\n");
892                 pandecode_log("\n");
893         }
894 
895         unsigned max_attr_index = 0;
896 
897         if (p->attributes)
898                 max_attr_index = pandecode_attribute_meta(attribute_count, p->attributes, false);
899 
900         if (p->attribute_buffers) {
901                 attr_mem = pandecode_find_mapped_gpu_mem_containing(p->attribute_buffers);
902                 pandecode_attributes(attr_mem, p->attribute_buffers, job_no, suffix, max_attr_index, false, job_type);
903         }
904 
905         if (p->varyings) {
906                 varying_count = pandecode_attribute_meta(varying_count, p->varyings, true);
907         }
908 
909         if (p->varying_buffers) {
910                 attr_mem = pandecode_find_mapped_gpu_mem_containing(p->varying_buffers);
911                 pandecode_attributes(attr_mem, p->varying_buffers, job_no, suffix, varying_count, true, job_type);
912         }
913 
914         if (p->uniform_buffers) {
915                 if (uniform_buffer_count)
916                         pandecode_uniform_buffers(p->uniform_buffers, uniform_buffer_count, job_no);
917                 else
918                         pandecode_msg("warn: UBOs specified but not referenced\n");
919         } else if (uniform_buffer_count)
920                 pandecode_msg("XXX: UBOs referenced but not specified\n");
921 
922         /* We don't want to actually dump uniforms, but we do need to validate
923          * that the counts we were given are sane */
924 
925         if (p->push_uniforms) {
926                 if (uniform_count)
927                         pandecode_uniforms(p->push_uniforms, uniform_count);
928                 else
929                         pandecode_msg("warn: Uniforms specified but not referenced\n");
930         } else if (uniform_count)
931                 pandecode_msg("XXX: Uniforms referenced but not specified\n");
932 
933         if (p->textures)
934                 pandecode_textures(p->textures, texture_count, job_no);
935 
936         if (p->samplers)
937                 pandecode_samplers(p->samplers, sampler_count, job_no);
938 }
939 
940 static void
pandecode_primitive_size(const void * s,bool constant)941 pandecode_primitive_size(const void *s, bool constant)
942 {
943         pan_unpack(s, PRIMITIVE_SIZE, ps);
944         if (ps.size_array == 0x0)
945                 return;
946 
947         DUMP_UNPACKED(PRIMITIVE_SIZE, ps, "Primitive Size:\n")
948 }
949 
950 static void
pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER * h,const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)951 pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER *h,
952                                       const struct pandecode_mapped_memory *mem,
953                                       mali_ptr job, int job_no, unsigned gpu_id)
954 {
955         struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, mem, job);
956         pan_section_unpack(p, COMPUTE_JOB, DRAW, draw);
957         pandecode_dcd(&draw, job_no, h->type, "", gpu_id);
958 
959         pandecode_log("Vertex Job Payload:\n");
960         pandecode_indent++;
961         pandecode_invocation(pan_section_ptr(p, COMPUTE_JOB, INVOCATION));
962         DUMP_SECTION(COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n");
963         DUMP_UNPACKED(DRAW, draw, "Draw:\n");
964         pandecode_indent--;
965         pandecode_log("\n");
966 }
967 
968 #if PAN_ARCH >= 6
969 static void
pandecode_bifrost_tiler_heap(mali_ptr gpu_va,int job_no)970 pandecode_bifrost_tiler_heap(mali_ptr gpu_va, int job_no)
971 {
972         struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
973         pan_unpack(PANDECODE_PTR(mem, gpu_va, void), TILER_HEAP, h);
974         DUMP_UNPACKED(TILER_HEAP, h, "Bifrost Tiler Heap:\n");
975 }
976 
977 static void
pandecode_bifrost_tiler(mali_ptr gpu_va,int job_no)978 pandecode_bifrost_tiler(mali_ptr gpu_va, int job_no)
979 {
980         struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
981         pan_unpack(PANDECODE_PTR(mem, gpu_va, void), TILER_CONTEXT, t);
982 
983         pandecode_bifrost_tiler_heap(t.heap, job_no);
984 
985         DUMP_UNPACKED(TILER_CONTEXT, t, "Bifrost Tiler:\n");
986         pandecode_indent++;
987         if (t.hierarchy_mask != 0xa &&
988             t.hierarchy_mask != 0x14 &&
989             t.hierarchy_mask != 0x28 &&
990             t.hierarchy_mask != 0x50 &&
991             t.hierarchy_mask != 0xa0)
992                 pandecode_msg("XXX: Unexpected hierarchy_mask (not 0xa, 0x14, 0x28, 0x50 or 0xa0)!");
993 
994         pandecode_indent--;
995 }
996 
997 static void
pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER * h,const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)998 pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER *h,
999                              const struct pandecode_mapped_memory *mem,
1000                              mali_ptr job, int job_no, unsigned gpu_id)
1001 {
1002         struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1003 
1004         pandecode_log("Vertex:\n");
1005         pan_section_unpack(p, INDEXED_VERTEX_JOB, VERTEX_DRAW, vert_draw);
1006         pandecode_dcd(&vert_draw, job_no, h->type, "", gpu_id);
1007         DUMP_UNPACKED(DRAW, vert_draw, "Vertex Draw:\n");
1008 
1009         pandecode_log("Fragment:\n");
1010         pan_section_unpack(p, INDEXED_VERTEX_JOB, FRAGMENT_DRAW, frag_draw);
1011         pandecode_dcd(&frag_draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id);
1012         DUMP_UNPACKED(DRAW, frag_draw, "Fragment Draw:\n");
1013 
1014         pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr);
1015         pandecode_log("Tiler Job Payload:\n");
1016         pandecode_indent++;
1017         pandecode_bifrost_tiler(tiler_ptr.address, job_no);
1018         pandecode_indent--;
1019 
1020         pandecode_invocation(pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION));
1021         pandecode_primitive(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE));
1022 
1023         /* TODO: gl_PointSize on Bifrost */
1024         pandecode_primitive_size(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE), true);
1025 
1026         pan_section_unpack(p, INDEXED_VERTEX_JOB, PADDING, padding);
1027 }
1028 
1029 static void
pandecode_tiler_job_bfr(const struct MALI_JOB_HEADER * h,const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)1030 pandecode_tiler_job_bfr(const struct MALI_JOB_HEADER *h,
1031                         const struct pandecode_mapped_memory *mem,
1032                         mali_ptr job, int job_no, unsigned gpu_id)
1033 {
1034         struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1035         pan_section_unpack(p, TILER_JOB, DRAW, draw);
1036         pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr);
1037         pandecode_dcd(&draw, job_no, h->type, "", gpu_id);
1038 
1039         pandecode_log("Tiler Job Payload:\n");
1040         pandecode_indent++;
1041         pandecode_bifrost_tiler(tiler_ptr.address, job_no);
1042 
1043         pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION));
1044         pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE));
1045 
1046         /* TODO: gl_PointSize on Bifrost */
1047         pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE), true);
1048         pan_section_unpack(p, TILER_JOB, PADDING, padding);
1049         DUMP_UNPACKED(DRAW, draw, "Draw:\n");
1050         pandecode_indent--;
1051         pandecode_log("\n");
1052 }
1053 #else
1054 static void
pandecode_tiler_job_mdg(const struct MALI_JOB_HEADER * h,const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)1055 pandecode_tiler_job_mdg(const struct MALI_JOB_HEADER *h,
1056                         const struct pandecode_mapped_memory *mem,
1057                         mali_ptr job, int job_no, unsigned gpu_id)
1058 {
1059         struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1060         pan_section_unpack(p, TILER_JOB, DRAW, draw);
1061         pandecode_dcd(&draw, job_no, h->type, "", gpu_id);
1062 
1063         pandecode_log("Tiler Job Payload:\n");
1064         pandecode_indent++;
1065         pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION));
1066         pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE));
1067         DUMP_UNPACKED(DRAW, draw, "Draw:\n");
1068 
1069         pan_section_unpack(p, TILER_JOB, PRIMITIVE, primitive);
1070         pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE),
1071                                  primitive.point_size_array_format == MALI_POINT_SIZE_ARRAY_FORMAT_NONE);
1072         pandecode_indent--;
1073         pandecode_log("\n");
1074 }
1075 #endif
1076 
1077 static void
pandecode_fragment_job(const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)1078 pandecode_fragment_job(const struct pandecode_mapped_memory *mem,
1079                        mali_ptr job, int job_no, unsigned gpu_id)
1080 {
1081         struct mali_fragment_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1082         pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s);
1083 
1084 
1085 #if PAN_ARCH == 4
1086         pandecode_sfbd(s.framebuffer, job_no, true, gpu_id);
1087 #else
1088         assert(s.framebuffer & MALI_FBD_TAG_IS_MFBD);
1089 
1090         struct pandecode_fbd info;
1091 
1092         info = pandecode_mfbd_bfr(s.framebuffer & ~MALI_FBD_TAG_MASK, job_no,
1093                                   true, gpu_id);
1094 #endif
1095 
1096 #if PAN_ARCH >= 5
1097         unsigned expected_tag = 0;
1098 
1099         /* Compute the tag for the tagged pointer. This contains the type of
1100          * FBD (MFBD/SFBD), and in the case of an MFBD, information about which
1101          * additional structures follow the MFBD header (an extra payload or
1102          * not, as well as a count of render targets) */
1103 
1104         expected_tag = MALI_FBD_TAG_IS_MFBD;
1105         if (info.has_extra)
1106                 expected_tag |= MALI_FBD_TAG_HAS_ZS_RT;
1107 
1108         expected_tag |= MALI_FBD_TAG_IS_MFBD | (MALI_POSITIVE(info.rt_count) << 2);
1109 #endif
1110 
1111         DUMP_UNPACKED(FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n");
1112 
1113 #if PAN_ARCH >= 5
1114         /* The FBD is a tagged pointer */
1115 
1116         unsigned tag = (s.framebuffer & MALI_FBD_TAG_MASK);
1117 
1118         if (tag != expected_tag)
1119                 pandecode_msg("XXX: expected FBD tag %X but got %X\n", expected_tag, tag);
1120 #endif
1121 
1122         pandecode_log("\n");
1123 }
1124 
1125 static void
pandecode_write_value_job(const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no)1126 pandecode_write_value_job(const struct pandecode_mapped_memory *mem,
1127                           mali_ptr job, int job_no)
1128 {
1129         struct mali_write_value_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1130         pan_section_unpack(p, WRITE_VALUE_JOB, PAYLOAD, u);
1131         DUMP_SECTION(WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n");
1132         pandecode_log("\n");
1133 }
1134 
1135 static void
pandecode_cache_flush_job(const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no)1136 pandecode_cache_flush_job(const struct pandecode_mapped_memory *mem,
1137                           mali_ptr job, int job_no)
1138 {
1139         struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1140         pan_section_unpack(p, CACHE_FLUSH_JOB, PAYLOAD, u);
1141         DUMP_SECTION(CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n");
1142         pandecode_log("\n");
1143 }
1144 
1145 /* Entrypoint to start tracing. jc_gpu_va is the GPU address for the first job
1146  * in the chain; later jobs are found by walking the chain. Bifrost is, well,
1147  * if it's bifrost or not. GPU ID is the more finegrained ID (at some point, we
1148  * might wish to combine this with the bifrost parameter) because some details
1149  * are model-specific even within a particular architecture. */
1150 
1151 void
GENX(pandecode_jc)1152 GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id)
1153 {
1154         pandecode_dump_file_open();
1155 
1156         unsigned job_descriptor_number = 0;
1157         mali_ptr next_job = 0;
1158 
1159         do {
1160                 struct pandecode_mapped_memory *mem =
1161                         pandecode_find_mapped_gpu_mem_containing(jc_gpu_va);
1162 
1163                 pan_unpack(PANDECODE_PTR(mem, jc_gpu_va, struct mali_job_header_packed),
1164                            JOB_HEADER, h);
1165                 next_job = h.next;
1166 
1167                 int job_no = job_descriptor_number++;
1168 
1169                 DUMP_UNPACKED(JOB_HEADER, h, "Job Header:\n");
1170                 pandecode_log("\n");
1171 
1172                 switch (h.type) {
1173                 case MALI_JOB_TYPE_WRITE_VALUE:
1174                         pandecode_write_value_job(mem, jc_gpu_va, job_no);
1175                         break;
1176 
1177                 case MALI_JOB_TYPE_CACHE_FLUSH:
1178                         pandecode_cache_flush_job(mem, jc_gpu_va, job_no);
1179                         break;
1180 
1181                 case MALI_JOB_TYPE_TILER:
1182 #if PAN_ARCH >= 6
1183                         pandecode_tiler_job_bfr(&h, mem, jc_gpu_va, job_no, gpu_id);
1184 #else
1185                         pandecode_tiler_job_mdg(&h, mem, jc_gpu_va, job_no, gpu_id);
1186 #endif
1187                         break;
1188 
1189                 case MALI_JOB_TYPE_VERTEX:
1190                 case MALI_JOB_TYPE_COMPUTE:
1191                         pandecode_vertex_compute_geometry_job(&h, mem, jc_gpu_va, job_no, gpu_id);
1192                         break;
1193 
1194 #if PAN_ARCH >= 6
1195                 case MALI_JOB_TYPE_INDEXED_VERTEX:
1196                         pandecode_indexed_vertex_job(&h, mem, jc_gpu_va, job_no, gpu_id);
1197                         break;
1198 #endif
1199 
1200                 case MALI_JOB_TYPE_FRAGMENT:
1201                         pandecode_fragment_job(mem, jc_gpu_va, job_no, gpu_id);
1202                         break;
1203 
1204                 default:
1205                         break;
1206                 }
1207         } while ((jc_gpu_va = next_job));
1208 
1209         fflush(pandecode_dump_stream);
1210         pandecode_map_read_write();
1211 }
1212 
1213 void
GENX(pandecode_abort_on_fault)1214 GENX(pandecode_abort_on_fault)(mali_ptr jc_gpu_va)
1215 {
1216         mali_ptr next_job = 0;
1217 
1218         do {
1219                 struct pandecode_mapped_memory *mem =
1220                         pandecode_find_mapped_gpu_mem_containing(jc_gpu_va);
1221 
1222                 pan_unpack(PANDECODE_PTR(mem, jc_gpu_va, struct mali_job_header_packed),
1223                            JOB_HEADER, h);
1224                 next_job = h.next;
1225 
1226                 /* Ensure the job is marked COMPLETE */
1227                 if (h.exception_status != 0x1) {
1228                         fprintf(stderr, "Incomplete job or timeout");
1229                         abort();
1230                 }
1231         } while ((jc_gpu_va = next_job));
1232 
1233         pandecode_map_read_write();
1234 }
1235