1 /*
2 * Copyright (C) 2017-2019 Alyssa Rosenzweig
3 * Copyright (C) 2017-2019 Connor Abbott
4 * Copyright (C) 2019 Collabora, Ltd.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25
26 #include <genxml/gen_macros.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <memory.h>
30 #include <stdbool.h>
31 #include <stdarg.h>
32 #include <errno.h>
33 #include <ctype.h>
34 #include "decode.h"
35
36 #include "midgard/disassemble.h"
37 #include "bifrost/disassemble.h"
38
39 #define DUMP_UNPACKED(T, var, ...) { \
40 pandecode_log(__VA_ARGS__); \
41 pan_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2); \
42 }
43
44 #define DUMP_CL(T, cl, ...) {\
45 pan_unpack(cl, T, temp); \
46 DUMP_UNPACKED(T, temp, __VA_ARGS__); \
47 }
48
49 #define DUMP_SECTION(A, S, cl, ...) { \
50 pan_section_unpack(cl, A, S, temp); \
51 pandecode_log(__VA_ARGS__); \
52 pan_section_print(pandecode_dump_stream, A, S, temp, (pandecode_indent + 1) * 2); \
53 }
54
55 #define MAP_ADDR(T, addr, cl) \
56 const uint8_t *cl = 0; \
57 { \
58 struct pandecode_mapped_memory *mapped_mem = pandecode_find_mapped_gpu_mem_containing(addr); \
59 cl = pandecode_fetch_gpu_mem(mapped_mem, addr, pan_size(T)); \
60 }
61
62 #define DUMP_ADDR(T, addr, ...) {\
63 MAP_ADDR(T, addr, cl) \
64 DUMP_CL(T, cl, __VA_ARGS__); \
65 }
66
67 /* Semantic logging type.
68 *
69 * Raw: for raw messages to be printed as is.
70 * Message: for helpful information to be commented out in replays.
71 *
72 * Use one of pandecode_log or pandecode_msg as syntax sugar.
73 */
74
75 enum pandecode_log_type {
76 PANDECODE_RAW,
77 PANDECODE_MESSAGE,
78 };
79
80 #define pandecode_log(...) pandecode_log_typed(PANDECODE_RAW, __VA_ARGS__)
81 #define pandecode_msg(...) pandecode_log_typed(PANDECODE_MESSAGE, __VA_ARGS__)
82
83 static unsigned pandecode_indent = 0;
84
85 static void
pandecode_make_indent(void)86 pandecode_make_indent(void)
87 {
88 for (unsigned i = 0; i < pandecode_indent; ++i)
89 fprintf(pandecode_dump_stream, " ");
90 }
91
92 static void PRINTFLIKE(2, 3)
pandecode_log_typed(enum pandecode_log_type type,const char * format,...)93 pandecode_log_typed(enum pandecode_log_type type, const char *format, ...)
94 {
95 va_list ap;
96
97 pandecode_make_indent();
98
99 if (type == PANDECODE_MESSAGE)
100 fprintf(pandecode_dump_stream, "// ");
101
102 va_start(ap, format);
103 vfprintf(pandecode_dump_stream, format, ap);
104 va_end(ap);
105 }
106
107 static void
pandecode_log_cont(const char * format,...)108 pandecode_log_cont(const char *format, ...)
109 {
110 va_list ap;
111
112 va_start(ap, format);
113 vfprintf(pandecode_dump_stream, format, ap);
114 va_end(ap);
115 }
116
117 /* To check for memory safety issues, validates that the given pointer in GPU
118 * memory is valid, containing at least sz bytes. The goal is to eliminate
119 * GPU-side memory bugs (NULL pointer dereferences, buffer overflows, or buffer
120 * overruns) by statically validating pointers.
121 */
122
123 static void
pandecode_validate_buffer(mali_ptr addr,size_t sz)124 pandecode_validate_buffer(mali_ptr addr, size_t sz)
125 {
126 if (!addr) {
127 pandecode_msg("XXX: null pointer deref");
128 return;
129 }
130
131 /* Find a BO */
132
133 struct pandecode_mapped_memory *bo =
134 pandecode_find_mapped_gpu_mem_containing(addr);
135
136 if (!bo) {
137 pandecode_msg("XXX: invalid memory dereference\n");
138 return;
139 }
140
141 /* Bounds check */
142
143 unsigned offset = addr - bo->gpu_va;
144 unsigned total = offset + sz;
145
146 if (total > bo->length) {
147 pandecode_msg("XXX: buffer overrun. "
148 "Chunk of size %zu at offset %d in buffer of size %zu. "
149 "Overrun by %zu bytes. \n",
150 sz, offset, bo->length, total - bo->length);
151 return;
152 }
153 }
154
155 #if PAN_ARCH <= 5
156 /* Midgard's tiler descriptor is embedded within the
157 * larger FBD */
158
159 static void
pandecode_midgard_tiler_descriptor(const struct mali_tiler_context_packed * tp,const struct mali_tiler_weights_packed * wp)160 pandecode_midgard_tiler_descriptor(
161 const struct mali_tiler_context_packed *tp,
162 const struct mali_tiler_weights_packed *wp)
163 {
164 pan_unpack(tp, TILER_CONTEXT, t);
165 DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler:\n");
166
167 /* We've never seen weights used in practice, but they exist */
168 pan_unpack(wp, TILER_WEIGHTS, w);
169 bool nonzero_weights = false;
170
171 nonzero_weights |= w.weight0 != 0x0;
172 nonzero_weights |= w.weight1 != 0x0;
173 nonzero_weights |= w.weight2 != 0x0;
174 nonzero_weights |= w.weight3 != 0x0;
175 nonzero_weights |= w.weight4 != 0x0;
176 nonzero_weights |= w.weight5 != 0x0;
177 nonzero_weights |= w.weight6 != 0x0;
178 nonzero_weights |= w.weight7 != 0x0;
179
180 if (nonzero_weights)
181 DUMP_UNPACKED(TILER_WEIGHTS, w, "Tiler Weights:\n");
182 }
183 #endif
184
185 /* Information about the framebuffer passed back for
186 * additional analysis */
187
188 struct pandecode_fbd {
189 unsigned width;
190 unsigned height;
191 unsigned rt_count;
192 bool has_extra;
193 };
194
195 #if PAN_ARCH == 4
196 static struct pandecode_fbd
pandecode_sfbd(uint64_t gpu_va,int job_no,bool is_fragment,unsigned gpu_id)197 pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
198 {
199 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
200 const void *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
201
202 struct pandecode_fbd info = {
203 .has_extra = false,
204 .rt_count = 1
205 };
206
207 pandecode_log("Framebuffer:\n");
208 pandecode_indent++;
209
210 DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, s, "Local Storage:\n");
211 pan_section_unpack(s, FRAMEBUFFER, PARAMETERS, p);
212 DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, p, "Parameters:\n");
213
214 const void *t = pan_section_ptr(s, FRAMEBUFFER, TILER);
215 const void *w = pan_section_ptr(s, FRAMEBUFFER, TILER_WEIGHTS);
216
217 pandecode_midgard_tiler_descriptor(t, w);
218
219 pandecode_indent--;
220
221 /* Dummy unpack of the padding section to make sure all words are 0.
222 * No need to call print here since the section is supposed to be empty.
223 */
224 pan_section_unpack(s, FRAMEBUFFER, PADDING_1, padding1);
225 pan_section_unpack(s, FRAMEBUFFER, PADDING_2, padding2);
226 pandecode_log("\n");
227
228 return info;
229 }
230 #endif
231
232 #if PAN_ARCH >= 5
233 static void
pandecode_local_storage(uint64_t gpu_va,int job_no)234 pandecode_local_storage(uint64_t gpu_va, int job_no)
235 {
236 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
237 const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
238 DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n");
239 }
240
241 static void
pandecode_render_target(uint64_t gpu_va,unsigned job_no,unsigned gpu_id,const struct MALI_FRAMEBUFFER_PARAMETERS * fb)242 pandecode_render_target(uint64_t gpu_va, unsigned job_no, unsigned gpu_id,
243 const struct MALI_FRAMEBUFFER_PARAMETERS *fb)
244 {
245 pandecode_log("Color Render Targets:\n");
246 pandecode_indent++;
247
248 for (int i = 0; i < (fb->render_target_count); i++) {
249 mali_ptr rt_va = gpu_va + i * pan_size(RENDER_TARGET);
250 struct pandecode_mapped_memory *mem =
251 pandecode_find_mapped_gpu_mem_containing(rt_va);
252 const struct mali_render_target_packed *PANDECODE_PTR_VAR(rtp, mem, (mali_ptr) rt_va);
253 DUMP_CL(RENDER_TARGET, rtp, "Color Render Target %d:\n", i);
254 }
255
256 pandecode_indent--;
257 pandecode_log("\n");
258 }
259 #endif
260
261 #if PAN_ARCH >= 6
262 static void
pandecode_sample_locations(const void * fb,int job_no)263 pandecode_sample_locations(const void *fb, int job_no)
264 {
265 pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
266
267 struct pandecode_mapped_memory *smem =
268 pandecode_find_mapped_gpu_mem_containing(params.sample_locations);
269
270 const u16 *PANDECODE_PTR_VAR(samples, smem, params.sample_locations);
271
272 pandecode_log("Sample locations:\n");
273 for (int i = 0; i < 33; i++) {
274 pandecode_log(" (%d, %d),\n",
275 samples[2 * i] - 128,
276 samples[2 * i + 1] - 128);
277 }
278 }
279 #endif
280
281 static void
282 pandecode_dcd(const struct MALI_DRAW *p,
283 int job_no, enum mali_job_type job_type,
284 char *suffix, unsigned gpu_id);
285
286 #if PAN_ARCH >= 5
287 static struct pandecode_fbd
pandecode_mfbd_bfr(uint64_t gpu_va,int job_no,bool is_fragment,unsigned gpu_id)288 pandecode_mfbd_bfr(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
289 {
290 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
291 const void *PANDECODE_PTR_VAR(fb, mem, (mali_ptr) gpu_va);
292 pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
293
294 struct pandecode_fbd info;
295
296 #if PAN_ARCH >= 6
297 pandecode_sample_locations(fb, job_no);
298
299 pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, bparams);
300 unsigned dcd_size = pan_size(DRAW);
301 struct pandecode_mapped_memory *dcdmem =
302 pandecode_find_mapped_gpu_mem_containing(bparams.frame_shader_dcds);
303
304 if (bparams.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
305 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (0 * dcd_size));
306 pan_unpack(dcd, DRAW, draw);
307 pandecode_log("Pre frame 0:\n");
308 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id);
309 }
310
311 if (bparams.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
312 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (1 * dcd_size));
313 pan_unpack(dcd, DRAW, draw);
314 pandecode_log("Pre frame 1:\n");
315 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id);
316 }
317
318 if (bparams.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) {
319 const void *PANDECODE_PTR_VAR(dcd, dcdmem, bparams.frame_shader_dcds + (2 * dcd_size));
320 pan_unpack(dcd, DRAW, draw);
321 pandecode_log("Post frame:\n");
322 pandecode_dcd(&draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id);
323 }
324 #endif
325
326 pandecode_log("Multi-Target Framebuffer:\n");
327 pandecode_indent++;
328
329 #if PAN_ARCH <= 5
330 DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n");
331 #endif
332
333 info.width = params.width;
334 info.height = params.height;
335 info.rt_count = params.render_target_count;
336 DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
337
338 #if PAN_ARCH <= 5
339 const void *t = pan_section_ptr(fb, FRAMEBUFFER, TILER);
340 const void *w = pan_section_ptr(fb, FRAMEBUFFER, TILER_WEIGHTS);
341 pandecode_midgard_tiler_descriptor(t, w);
342 #endif
343
344 pandecode_indent--;
345 pandecode_log("\n");
346
347 gpu_va += pan_size(FRAMEBUFFER);
348
349 info.has_extra = params.has_zs_crc_extension;
350
351 if (info.has_extra) {
352 struct pandecode_mapped_memory *mem =
353 pandecode_find_mapped_gpu_mem_containing(gpu_va);
354 const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR(zs_crc, mem, (mali_ptr)gpu_va);
355 DUMP_CL(ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n");
356 pandecode_log("\n");
357
358 gpu_va += pan_size(ZS_CRC_EXTENSION);
359 }
360
361 if (is_fragment)
362 pandecode_render_target(gpu_va, job_no, gpu_id, ¶ms);
363
364 return info;
365 }
366 #endif
367
368 static void
pandecode_attributes(const struct pandecode_mapped_memory * mem,mali_ptr addr,int job_no,char * suffix,int count,bool varying,enum mali_job_type job_type)369 pandecode_attributes(const struct pandecode_mapped_memory *mem,
370 mali_ptr addr, int job_no, char *suffix,
371 int count, bool varying, enum mali_job_type job_type)
372 {
373 char *prefix = varying ? "Varying" : "Attribute";
374 assert(addr);
375
376 if (!count) {
377 pandecode_msg("warn: No %s records\n", prefix);
378 return;
379 }
380
381 MAP_ADDR(ATTRIBUTE_BUFFER, addr, cl);
382
383 for (int i = 0; i < count; ++i) {
384 pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp);
385 DUMP_UNPACKED(ATTRIBUTE_BUFFER, temp, "%s:\n", prefix);
386
387 switch (temp.type) {
388 case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION:
389 case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: {
390 pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER),
391 ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2);
392 pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT,
393 temp2, (pandecode_indent + 1) * 2);
394 i++;
395 break;
396 }
397 case MALI_ATTRIBUTE_TYPE_3D_LINEAR:
398 case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: {
399 pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D),
400 ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2);
401 pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D,
402 temp2, (pandecode_indent + 1) * 2);
403 i++;
404 break;
405 }
406 default:
407 break;
408 }
409 }
410 pandecode_log("\n");
411 }
412
413 #if PAN_ARCH >= 6
414 /* Decodes a Bifrost blend constant. See the notes in bifrost_blend_rt */
415
416 static mali_ptr
pandecode_bifrost_blend(void * descs,int job_no,int rt_no,mali_ptr frag_shader)417 pandecode_bifrost_blend(void *descs, int job_no, int rt_no, mali_ptr frag_shader)
418 {
419 pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b);
420 DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no);
421 if (b.internal.mode != MALI_BLEND_MODE_SHADER)
422 return 0;
423
424 return (frag_shader & 0xFFFFFFFF00000000ULL) | b.internal.shader.pc;
425 }
426 #elif PAN_ARCH == 5
427 static mali_ptr
pandecode_midgard_blend_mrt(void * descs,int job_no,int rt_no)428 pandecode_midgard_blend_mrt(void *descs, int job_no, int rt_no)
429 {
430 pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b);
431 DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no);
432 return b.blend_shader ? (b.shader_pc & ~0xf) : 0;
433 }
434 #endif
435
436 static unsigned
pandecode_attribute_meta(int count,mali_ptr attribute,bool varying)437 pandecode_attribute_meta(int count, mali_ptr attribute, bool varying)
438 {
439 unsigned max = 0;
440
441 for (int i = 0; i < count; ++i, attribute += pan_size(ATTRIBUTE)) {
442 MAP_ADDR(ATTRIBUTE, attribute, cl);
443 pan_unpack(cl, ATTRIBUTE, a);
444 DUMP_UNPACKED(ATTRIBUTE, a, "%s:\n", varying ? "Varying" : "Attribute");
445 max = MAX2(max, a.buffer_index);
446 }
447
448 pandecode_log("\n");
449 return MIN2(max + 1, 256);
450 }
451
452 /* return bits [lo, hi) of word */
453 static u32
bits(u32 word,u32 lo,u32 hi)454 bits(u32 word, u32 lo, u32 hi)
455 {
456 if (hi - lo >= 32)
457 return word; // avoid undefined behavior with the shift
458
459 if (lo >= 32)
460 return 0;
461
462 return (word >> lo) & ((1 << (hi - lo)) - 1);
463 }
464
465 static void
pandecode_invocation(const void * i)466 pandecode_invocation(const void *i)
467 {
468 /* Decode invocation_count. See the comment before the definition of
469 * invocation_count for an explanation.
470 */
471 pan_unpack(i, INVOCATION, invocation);
472
473 unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
474 unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1;
475 unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1;
476
477 unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1;
478 unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1;
479 unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
480
481 pandecode_log("Invocation (%d, %d, %d) x (%d, %d, %d)\n",
482 size_x, size_y, size_z,
483 groups_x, groups_y, groups_z);
484
485 DUMP_UNPACKED(INVOCATION, invocation, "Invocation:\n")
486 }
487
488 static void
pandecode_primitive(const void * p)489 pandecode_primitive(const void *p)
490 {
491 pan_unpack(p, PRIMITIVE, primitive);
492 DUMP_UNPACKED(PRIMITIVE, primitive, "Primitive:\n");
493
494 /* Validate an index buffer is present if we need one. TODO: verify
495 * relationship between invocation_count and index_count */
496
497 if (primitive.indices) {
498 /* Grab the size */
499 unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32) ?
500 sizeof(uint32_t) : primitive.index_type;
501
502 /* Ensure we got a size, and if so, validate the index buffer
503 * is large enough to hold a full set of indices of the given
504 * size */
505
506 if (!size)
507 pandecode_msg("XXX: index size missing\n");
508 else
509 pandecode_validate_buffer(primitive.indices, primitive.index_count * size);
510 } else if (primitive.index_type)
511 pandecode_msg("XXX: unexpected index size\n");
512 }
513
514 static void
pandecode_uniform_buffers(mali_ptr pubufs,int ubufs_count,int job_no)515 pandecode_uniform_buffers(mali_ptr pubufs, int ubufs_count, int job_no)
516 {
517 struct pandecode_mapped_memory *umem = pandecode_find_mapped_gpu_mem_containing(pubufs);
518 uint64_t *PANDECODE_PTR_VAR(ubufs, umem, pubufs);
519
520 for (int i = 0; i < ubufs_count; i++) {
521 mali_ptr addr = (ubufs[i] >> 10) << 2;
522 unsigned size = addr ? (((ubufs[i] & ((1 << 10) - 1)) + 1) * 16) : 0;
523
524 pandecode_validate_buffer(addr, size);
525
526 char *ptr = pointer_as_memory_reference(addr);
527 pandecode_log("ubuf_%d[%u] = %s;\n", i, size, ptr);
528 free(ptr);
529 }
530
531 pandecode_log("\n");
532 }
533
534 static void
pandecode_uniforms(mali_ptr uniforms,unsigned uniform_count)535 pandecode_uniforms(mali_ptr uniforms, unsigned uniform_count)
536 {
537 pandecode_validate_buffer(uniforms, uniform_count * 16);
538
539 char *ptr = pointer_as_memory_reference(uniforms);
540 pandecode_log("vec4 uniforms[%u] = %s;\n", uniform_count, ptr);
541 free(ptr);
542 pandecode_log("\n");
543 }
544
545 static const char *
shader_type_for_job(unsigned type)546 shader_type_for_job(unsigned type)
547 {
548 switch (type) {
549 case MALI_JOB_TYPE_VERTEX: return "VERTEX";
550 case MALI_JOB_TYPE_TILER: return "FRAGMENT";
551 case MALI_JOB_TYPE_FRAGMENT: return "FRAGMENT";
552 case MALI_JOB_TYPE_COMPUTE: return "COMPUTE";
553 default: return "UNKNOWN";
554 }
555 }
556
557 static unsigned shader_id = 0;
558
559 static struct midgard_disasm_stats
pandecode_shader_disassemble(mali_ptr shader_ptr,int shader_no,int type,unsigned gpu_id)560 pandecode_shader_disassemble(mali_ptr shader_ptr, int shader_no, int type,
561 unsigned gpu_id)
562 {
563 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(shader_ptr);
564 uint8_t *PANDECODE_PTR_VAR(code, mem, shader_ptr);
565
566 /* Compute maximum possible size */
567 size_t sz = mem->length - (shader_ptr - mem->gpu_va);
568
569 /* Print some boilerplate to clearly denote the assembly (which doesn't
570 * obey indentation rules), and actually do the disassembly! */
571
572 pandecode_log_cont("\n\n");
573
574 struct midgard_disasm_stats stats;
575
576 #if PAN_ARCH >= 6
577 disassemble_bifrost(pandecode_dump_stream, code, sz, true);
578
579 /* TODO: Extend stats to Bifrost */
580 stats.texture_count = -128;
581 stats.sampler_count = -128;
582 stats.attribute_count = -128;
583 stats.varying_count = -128;
584 stats.uniform_count = -128;
585 stats.uniform_buffer_count = -128;
586 stats.work_count = -128;
587
588 stats.instruction_count = 0;
589 stats.bundle_count = 0;
590 stats.quadword_count = 0;
591 stats.helper_invocations = false;
592 #else
593 stats = disassemble_midgard(pandecode_dump_stream,
594 code, sz, gpu_id, true);
595 #endif
596
597 unsigned nr_threads =
598 (stats.work_count <= 4) ? 4 :
599 (stats.work_count <= 8) ? 2 :
600 1;
601
602 pandecode_log_cont("shader%d - MESA_SHADER_%s shader: "
603 "%u inst, %u bundles, %u quadwords, "
604 "%u registers, %u threads, 0 loops, 0:0 spills:fills\n\n\n",
605 shader_id++,
606 shader_type_for_job(type),
607 stats.instruction_count, stats.bundle_count, stats.quadword_count,
608 stats.work_count, nr_threads);
609
610 return stats;
611 }
612
613 static void
pandecode_texture_payload(mali_ptr payload,enum mali_texture_dimension dim,enum mali_texture_layout layout,bool manual_stride,uint8_t levels,uint16_t nr_samples,uint16_t array_size,struct pandecode_mapped_memory * tmem)614 pandecode_texture_payload(mali_ptr payload,
615 enum mali_texture_dimension dim,
616 enum mali_texture_layout layout,
617 bool manual_stride,
618 uint8_t levels,
619 uint16_t nr_samples,
620 uint16_t array_size,
621 struct pandecode_mapped_memory *tmem)
622 {
623 pandecode_log(".payload = {\n");
624 pandecode_indent++;
625
626 /* A bunch of bitmap pointers follow.
627 * We work out the correct number,
628 * based on the mipmap/cubemap
629 * properties, but dump extra
630 * possibilities to futureproof */
631
632 int bitmap_count = levels;
633
634 /* Miptree for each face */
635 if (dim == MALI_TEXTURE_DIMENSION_CUBE)
636 bitmap_count *= 6;
637
638 /* Array of layers */
639 bitmap_count *= nr_samples;
640
641 /* Array of textures */
642 bitmap_count *= array_size;
643
644 /* Stride for each element */
645 if (manual_stride)
646 bitmap_count *= 2;
647
648 mali_ptr *pointers_and_strides = pandecode_fetch_gpu_mem(tmem,
649 payload, sizeof(mali_ptr) * bitmap_count);
650 for (int i = 0; i < bitmap_count; ++i) {
651 /* How we dump depends if this is a stride or a pointer */
652
653 if (manual_stride && (i & 1)) {
654 /* signed 32-bit snuck in as a 64-bit pointer */
655 uint64_t stride_set = pointers_and_strides[i];
656 int32_t line_stride = stride_set;
657 int32_t surface_stride = stride_set >> 32;
658 pandecode_log("(mali_ptr) %d /* surface stride */ %d /* line stride */, \n",
659 surface_stride, line_stride);
660 } else {
661 char *a = pointer_as_memory_reference(pointers_and_strides[i]);
662 pandecode_log("%s, \n", a);
663 free(a);
664 }
665 }
666
667 pandecode_indent--;
668 pandecode_log("},\n");
669 }
670
671 #if PAN_ARCH <= 5
672 static void
pandecode_texture(mali_ptr u,struct pandecode_mapped_memory * tmem,unsigned job_no,unsigned tex)673 pandecode_texture(mali_ptr u,
674 struct pandecode_mapped_memory *tmem,
675 unsigned job_no, unsigned tex)
676 {
677 struct pandecode_mapped_memory *mapped_mem = pandecode_find_mapped_gpu_mem_containing(u);
678 const uint8_t *cl = pandecode_fetch_gpu_mem(mapped_mem, u, pan_size(TEXTURE));
679
680 pan_unpack(cl, TEXTURE, temp);
681 DUMP_UNPACKED(TEXTURE, temp, "Texture:\n")
682
683 pandecode_indent++;
684 unsigned nr_samples = temp.dimension == MALI_TEXTURE_DIMENSION_3D ?
685 1 : temp.sample_count;
686 pandecode_texture_payload(u + pan_size(TEXTURE),
687 temp.dimension, temp.texel_ordering, temp.manual_stride,
688 temp.levels, nr_samples, temp.array_size, mapped_mem);
689 pandecode_indent--;
690 }
691 #else
692 static void
pandecode_bifrost_texture(const void * cl,unsigned job_no,unsigned tex)693 pandecode_bifrost_texture(
694 const void *cl,
695 unsigned job_no,
696 unsigned tex)
697 {
698 pan_unpack(cl, TEXTURE, temp);
699 DUMP_UNPACKED(TEXTURE, temp, "Texture:\n")
700
701 struct pandecode_mapped_memory *tmem = pandecode_find_mapped_gpu_mem_containing(temp.surfaces);
702 unsigned nr_samples = temp.dimension == MALI_TEXTURE_DIMENSION_3D ?
703 1 : temp.sample_count;
704 pandecode_indent++;
705 pandecode_texture_payload(temp.surfaces, temp.dimension, temp.texel_ordering,
706 true, temp.levels, nr_samples, temp.array_size, tmem);
707 pandecode_indent--;
708 }
709 #endif
710
711 static void
pandecode_blend_shader_disassemble(mali_ptr shader,int job_no,int job_type,unsigned gpu_id)712 pandecode_blend_shader_disassemble(mali_ptr shader, int job_no, int job_type,
713 unsigned gpu_id)
714 {
715 struct midgard_disasm_stats stats =
716 pandecode_shader_disassemble(shader, job_no, job_type, gpu_id);
717
718 bool has_texture = (stats.texture_count > 0);
719 bool has_sampler = (stats.sampler_count > 0);
720 bool has_attribute = (stats.attribute_count > 0);
721 bool has_varying = (stats.varying_count > 0);
722 bool has_uniform = (stats.uniform_count > 0);
723 bool has_ubo = (stats.uniform_buffer_count > 0);
724
725 if (has_texture || has_sampler)
726 pandecode_msg("XXX: blend shader accessing textures\n");
727
728 if (has_attribute || has_varying)
729 pandecode_msg("XXX: blend shader accessing interstage\n");
730
731 if (has_uniform || has_ubo)
732 pandecode_msg("XXX: blend shader accessing uniforms\n");
733 }
734
735 static void
pandecode_textures(mali_ptr textures,unsigned texture_count,int job_no)736 pandecode_textures(mali_ptr textures, unsigned texture_count, int job_no)
737 {
738 struct pandecode_mapped_memory *mmem = pandecode_find_mapped_gpu_mem_containing(textures);
739
740 if (!mmem)
741 return;
742
743 pandecode_log("Textures %"PRIx64"_%d:\n", textures, job_no);
744 pandecode_indent++;
745
746 #if PAN_ARCH >= 6
747 const void *cl =
748 pandecode_fetch_gpu_mem(mmem,
749 textures,
750 pan_size(TEXTURE) *
751 texture_count);
752
753 for (unsigned tex = 0; tex < texture_count; ++tex) {
754 pandecode_bifrost_texture(cl + pan_size(TEXTURE) * tex,
755 job_no, tex);
756 }
757 #else
758 mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures);
759
760 for (int tex = 0; tex < texture_count; ++tex) {
761 mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures + tex * sizeof(mali_ptr));
762 char *a = pointer_as_memory_reference(*u);
763 pandecode_log("%s,\n", a);
764 free(a);
765 }
766
767 /* Now, finally, descend down into the texture descriptor */
768 for (unsigned tex = 0; tex < texture_count; ++tex) {
769 mali_ptr *PANDECODE_PTR_VAR(u, mmem, textures + tex * sizeof(mali_ptr));
770 struct pandecode_mapped_memory *tmem = pandecode_find_mapped_gpu_mem_containing(*u);
771 if (tmem)
772 pandecode_texture(*u, tmem, job_no, tex);
773 }
774 #endif
775 pandecode_indent--;
776 pandecode_log("\n");
777 }
778
779 static void
pandecode_samplers(mali_ptr samplers,unsigned sampler_count,int job_no)780 pandecode_samplers(mali_ptr samplers, unsigned sampler_count, int job_no)
781 {
782 pandecode_log("Samplers %"PRIx64"_%d:\n", samplers, job_no);
783 pandecode_indent++;
784
785 for (int i = 0; i < sampler_count; ++i)
786 DUMP_ADDR(SAMPLER, samplers + (pan_size(SAMPLER) * i), "Sampler %d:\n", i);
787
788 pandecode_indent--;
789 pandecode_log("\n");
790 }
791
792 static void
pandecode_dcd(const struct MALI_DRAW * p,int job_no,enum mali_job_type job_type,char * suffix,unsigned gpu_id)793 pandecode_dcd(const struct MALI_DRAW *p,
794 int job_no, enum mali_job_type job_type,
795 char *suffix, unsigned gpu_id)
796 {
797 struct pandecode_mapped_memory *attr_mem;
798
799 #if PAN_ARCH >= 5
800 struct pandecode_fbd fbd_info = {
801 /* Default for Bifrost */
802 .rt_count = 1
803 };
804 #endif
805
806 #if PAN_ARCH >= 6
807 pandecode_local_storage(p->thread_storage & ~1, job_no);
808 #elif PAN_ARCH == 5
809 if (job_type != MALI_JOB_TYPE_TILER) {
810 pandecode_local_storage(p->thread_storage & ~1, job_no);
811 } else {
812 assert(p->fbd & MALI_FBD_TAG_IS_MFBD);
813 fbd_info = pandecode_mfbd_bfr((u64) ((uintptr_t) p->fbd) & ~MALI_FBD_TAG_MASK,
814 job_no, false, gpu_id);
815 }
816 #else
817 pandecode_sfbd((u64) (uintptr_t) p->fbd, job_no, false, gpu_id);
818 #endif
819
820 int varying_count = 0, attribute_count = 0, uniform_count = 0, uniform_buffer_count = 0;
821 int texture_count = 0, sampler_count = 0;
822
823 if (p->state) {
824 struct pandecode_mapped_memory *smem = pandecode_find_mapped_gpu_mem_containing(p->state);
825 uint32_t *cl = pandecode_fetch_gpu_mem(smem, p->state, pan_size(RENDERER_STATE));
826
827 pan_unpack(cl, RENDERER_STATE, state);
828
829 if (state.shader.shader & ~0xF)
830 pandecode_shader_disassemble(state.shader.shader & ~0xF, job_no, job_type, gpu_id);
831
832 #if PAN_ARCH >= 6
833 bool idvs = (job_type == MALI_JOB_TYPE_INDEXED_VERTEX);
834
835 if (idvs && state.secondary_shader)
836 pandecode_shader_disassemble(state.secondary_shader, job_no, job_type, gpu_id);
837 #endif
838 DUMP_UNPACKED(RENDERER_STATE, state, "State:\n");
839 pandecode_indent++;
840
841 /* Save for dumps */
842 attribute_count = state.shader.attribute_count;
843 varying_count = state.shader.varying_count;
844 texture_count = state.shader.texture_count;
845 sampler_count = state.shader.sampler_count;
846 uniform_buffer_count = state.properties.uniform_buffer_count;
847
848 #if PAN_ARCH >= 6
849 uniform_count = state.preload.uniform_count;
850 #else
851 uniform_count = state.properties.uniform_count;
852 #endif
853
854 #if PAN_ARCH >= 6
855 DUMP_UNPACKED(PRELOAD, state.preload, "Preload:\n");
856 #elif PAN_ARCH == 4
857 mali_ptr shader = state.blend_shader & ~0xF;
858 if (state.multisample_misc.blend_shader && shader)
859 pandecode_blend_shader_disassemble(shader, job_no, job_type, gpu_id);
860 #endif
861 pandecode_indent--;
862 pandecode_log("\n");
863
864 /* MRT blend fields are used whenever MFBD is used, with
865 * per-RT descriptors */
866
867 #if PAN_ARCH >= 5
868 if ((job_type == MALI_JOB_TYPE_TILER || job_type == MALI_JOB_TYPE_FRAGMENT) &&
869 (PAN_ARCH >= 6 || p->thread_storage & MALI_FBD_TAG_IS_MFBD)) {
870 void* blend_base = ((void *) cl) + pan_size(RENDERER_STATE);
871
872 for (unsigned i = 0; i < fbd_info.rt_count; i++) {
873 mali_ptr shader = 0;
874
875 #if PAN_ARCH >= 6
876 shader = pandecode_bifrost_blend(blend_base, job_no, i,
877 state.shader.shader);
878 #else
879 shader = pandecode_midgard_blend_mrt(blend_base, job_no, i);
880 #endif
881 if (shader & ~0xF)
882 pandecode_blend_shader_disassemble(shader, job_no, job_type,
883 gpu_id);
884 }
885 }
886 #endif
887 } else
888 pandecode_msg("XXX: missing shader descriptor\n");
889
890 if (p->viewport) {
891 DUMP_ADDR(VIEWPORT, p->viewport, "Viewport:\n");
892 pandecode_log("\n");
893 }
894
895 unsigned max_attr_index = 0;
896
897 if (p->attributes)
898 max_attr_index = pandecode_attribute_meta(attribute_count, p->attributes, false);
899
900 if (p->attribute_buffers) {
901 attr_mem = pandecode_find_mapped_gpu_mem_containing(p->attribute_buffers);
902 pandecode_attributes(attr_mem, p->attribute_buffers, job_no, suffix, max_attr_index, false, job_type);
903 }
904
905 if (p->varyings) {
906 varying_count = pandecode_attribute_meta(varying_count, p->varyings, true);
907 }
908
909 if (p->varying_buffers) {
910 attr_mem = pandecode_find_mapped_gpu_mem_containing(p->varying_buffers);
911 pandecode_attributes(attr_mem, p->varying_buffers, job_no, suffix, varying_count, true, job_type);
912 }
913
914 if (p->uniform_buffers) {
915 if (uniform_buffer_count)
916 pandecode_uniform_buffers(p->uniform_buffers, uniform_buffer_count, job_no);
917 else
918 pandecode_msg("warn: UBOs specified but not referenced\n");
919 } else if (uniform_buffer_count)
920 pandecode_msg("XXX: UBOs referenced but not specified\n");
921
922 /* We don't want to actually dump uniforms, but we do need to validate
923 * that the counts we were given are sane */
924
925 if (p->push_uniforms) {
926 if (uniform_count)
927 pandecode_uniforms(p->push_uniforms, uniform_count);
928 else
929 pandecode_msg("warn: Uniforms specified but not referenced\n");
930 } else if (uniform_count)
931 pandecode_msg("XXX: Uniforms referenced but not specified\n");
932
933 if (p->textures)
934 pandecode_textures(p->textures, texture_count, job_no);
935
936 if (p->samplers)
937 pandecode_samplers(p->samplers, sampler_count, job_no);
938 }
939
940 static void
pandecode_primitive_size(const void * s,bool constant)941 pandecode_primitive_size(const void *s, bool constant)
942 {
943 pan_unpack(s, PRIMITIVE_SIZE, ps);
944 if (ps.size_array == 0x0)
945 return;
946
947 DUMP_UNPACKED(PRIMITIVE_SIZE, ps, "Primitive Size:\n")
948 }
949
950 static void
pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER * h,const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)951 pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER *h,
952 const struct pandecode_mapped_memory *mem,
953 mali_ptr job, int job_no, unsigned gpu_id)
954 {
955 struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, mem, job);
956 pan_section_unpack(p, COMPUTE_JOB, DRAW, draw);
957 pandecode_dcd(&draw, job_no, h->type, "", gpu_id);
958
959 pandecode_log("Vertex Job Payload:\n");
960 pandecode_indent++;
961 pandecode_invocation(pan_section_ptr(p, COMPUTE_JOB, INVOCATION));
962 DUMP_SECTION(COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n");
963 DUMP_UNPACKED(DRAW, draw, "Draw:\n");
964 pandecode_indent--;
965 pandecode_log("\n");
966 }
967
968 #if PAN_ARCH >= 6
969 static void
pandecode_bifrost_tiler_heap(mali_ptr gpu_va,int job_no)970 pandecode_bifrost_tiler_heap(mali_ptr gpu_va, int job_no)
971 {
972 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
973 pan_unpack(PANDECODE_PTR(mem, gpu_va, void), TILER_HEAP, h);
974 DUMP_UNPACKED(TILER_HEAP, h, "Bifrost Tiler Heap:\n");
975 }
976
977 static void
pandecode_bifrost_tiler(mali_ptr gpu_va,int job_no)978 pandecode_bifrost_tiler(mali_ptr gpu_va, int job_no)
979 {
980 struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
981 pan_unpack(PANDECODE_PTR(mem, gpu_va, void), TILER_CONTEXT, t);
982
983 pandecode_bifrost_tiler_heap(t.heap, job_no);
984
985 DUMP_UNPACKED(TILER_CONTEXT, t, "Bifrost Tiler:\n");
986 pandecode_indent++;
987 if (t.hierarchy_mask != 0xa &&
988 t.hierarchy_mask != 0x14 &&
989 t.hierarchy_mask != 0x28 &&
990 t.hierarchy_mask != 0x50 &&
991 t.hierarchy_mask != 0xa0)
992 pandecode_msg("XXX: Unexpected hierarchy_mask (not 0xa, 0x14, 0x28, 0x50 or 0xa0)!");
993
994 pandecode_indent--;
995 }
996
997 static void
pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER * h,const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)998 pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER *h,
999 const struct pandecode_mapped_memory *mem,
1000 mali_ptr job, int job_no, unsigned gpu_id)
1001 {
1002 struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1003
1004 pandecode_log("Vertex:\n");
1005 pan_section_unpack(p, INDEXED_VERTEX_JOB, VERTEX_DRAW, vert_draw);
1006 pandecode_dcd(&vert_draw, job_no, h->type, "", gpu_id);
1007 DUMP_UNPACKED(DRAW, vert_draw, "Vertex Draw:\n");
1008
1009 pandecode_log("Fragment:\n");
1010 pan_section_unpack(p, INDEXED_VERTEX_JOB, FRAGMENT_DRAW, frag_draw);
1011 pandecode_dcd(&frag_draw, job_no, MALI_JOB_TYPE_FRAGMENT, "", gpu_id);
1012 DUMP_UNPACKED(DRAW, frag_draw, "Fragment Draw:\n");
1013
1014 pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr);
1015 pandecode_log("Tiler Job Payload:\n");
1016 pandecode_indent++;
1017 pandecode_bifrost_tiler(tiler_ptr.address, job_no);
1018 pandecode_indent--;
1019
1020 pandecode_invocation(pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION));
1021 pandecode_primitive(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE));
1022
1023 /* TODO: gl_PointSize on Bifrost */
1024 pandecode_primitive_size(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE), true);
1025
1026 pan_section_unpack(p, INDEXED_VERTEX_JOB, PADDING, padding);
1027 }
1028
1029 static void
pandecode_tiler_job_bfr(const struct MALI_JOB_HEADER * h,const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)1030 pandecode_tiler_job_bfr(const struct MALI_JOB_HEADER *h,
1031 const struct pandecode_mapped_memory *mem,
1032 mali_ptr job, int job_no, unsigned gpu_id)
1033 {
1034 struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1035 pan_section_unpack(p, TILER_JOB, DRAW, draw);
1036 pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr);
1037 pandecode_dcd(&draw, job_no, h->type, "", gpu_id);
1038
1039 pandecode_log("Tiler Job Payload:\n");
1040 pandecode_indent++;
1041 pandecode_bifrost_tiler(tiler_ptr.address, job_no);
1042
1043 pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION));
1044 pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE));
1045
1046 /* TODO: gl_PointSize on Bifrost */
1047 pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE), true);
1048 pan_section_unpack(p, TILER_JOB, PADDING, padding);
1049 DUMP_UNPACKED(DRAW, draw, "Draw:\n");
1050 pandecode_indent--;
1051 pandecode_log("\n");
1052 }
1053 #else
1054 static void
pandecode_tiler_job_mdg(const struct MALI_JOB_HEADER * h,const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)1055 pandecode_tiler_job_mdg(const struct MALI_JOB_HEADER *h,
1056 const struct pandecode_mapped_memory *mem,
1057 mali_ptr job, int job_no, unsigned gpu_id)
1058 {
1059 struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1060 pan_section_unpack(p, TILER_JOB, DRAW, draw);
1061 pandecode_dcd(&draw, job_no, h->type, "", gpu_id);
1062
1063 pandecode_log("Tiler Job Payload:\n");
1064 pandecode_indent++;
1065 pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION));
1066 pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE));
1067 DUMP_UNPACKED(DRAW, draw, "Draw:\n");
1068
1069 pan_section_unpack(p, TILER_JOB, PRIMITIVE, primitive);
1070 pandecode_primitive_size(pan_section_ptr(p, TILER_JOB, PRIMITIVE_SIZE),
1071 primitive.point_size_array_format == MALI_POINT_SIZE_ARRAY_FORMAT_NONE);
1072 pandecode_indent--;
1073 pandecode_log("\n");
1074 }
1075 #endif
1076
1077 static void
pandecode_fragment_job(const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no,unsigned gpu_id)1078 pandecode_fragment_job(const struct pandecode_mapped_memory *mem,
1079 mali_ptr job, int job_no, unsigned gpu_id)
1080 {
1081 struct mali_fragment_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1082 pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s);
1083
1084
1085 #if PAN_ARCH == 4
1086 pandecode_sfbd(s.framebuffer, job_no, true, gpu_id);
1087 #else
1088 assert(s.framebuffer & MALI_FBD_TAG_IS_MFBD);
1089
1090 struct pandecode_fbd info;
1091
1092 info = pandecode_mfbd_bfr(s.framebuffer & ~MALI_FBD_TAG_MASK, job_no,
1093 true, gpu_id);
1094 #endif
1095
1096 #if PAN_ARCH >= 5
1097 unsigned expected_tag = 0;
1098
1099 /* Compute the tag for the tagged pointer. This contains the type of
1100 * FBD (MFBD/SFBD), and in the case of an MFBD, information about which
1101 * additional structures follow the MFBD header (an extra payload or
1102 * not, as well as a count of render targets) */
1103
1104 expected_tag = MALI_FBD_TAG_IS_MFBD;
1105 if (info.has_extra)
1106 expected_tag |= MALI_FBD_TAG_HAS_ZS_RT;
1107
1108 expected_tag |= MALI_FBD_TAG_IS_MFBD | (MALI_POSITIVE(info.rt_count) << 2);
1109 #endif
1110
1111 DUMP_UNPACKED(FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n");
1112
1113 #if PAN_ARCH >= 5
1114 /* The FBD is a tagged pointer */
1115
1116 unsigned tag = (s.framebuffer & MALI_FBD_TAG_MASK);
1117
1118 if (tag != expected_tag)
1119 pandecode_msg("XXX: expected FBD tag %X but got %X\n", expected_tag, tag);
1120 #endif
1121
1122 pandecode_log("\n");
1123 }
1124
1125 static void
pandecode_write_value_job(const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no)1126 pandecode_write_value_job(const struct pandecode_mapped_memory *mem,
1127 mali_ptr job, int job_no)
1128 {
1129 struct mali_write_value_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1130 pan_section_unpack(p, WRITE_VALUE_JOB, PAYLOAD, u);
1131 DUMP_SECTION(WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n");
1132 pandecode_log("\n");
1133 }
1134
1135 static void
pandecode_cache_flush_job(const struct pandecode_mapped_memory * mem,mali_ptr job,int job_no)1136 pandecode_cache_flush_job(const struct pandecode_mapped_memory *mem,
1137 mali_ptr job, int job_no)
1138 {
1139 struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(p, mem, job);
1140 pan_section_unpack(p, CACHE_FLUSH_JOB, PAYLOAD, u);
1141 DUMP_SECTION(CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n");
1142 pandecode_log("\n");
1143 }
1144
1145 /* Entrypoint to start tracing. jc_gpu_va is the GPU address for the first job
1146 * in the chain; later jobs are found by walking the chain. Bifrost is, well,
1147 * if it's bifrost or not. GPU ID is the more finegrained ID (at some point, we
1148 * might wish to combine this with the bifrost parameter) because some details
1149 * are model-specific even within a particular architecture. */
1150
1151 void
GENX(pandecode_jc)1152 GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id)
1153 {
1154 pandecode_dump_file_open();
1155
1156 unsigned job_descriptor_number = 0;
1157 mali_ptr next_job = 0;
1158
1159 do {
1160 struct pandecode_mapped_memory *mem =
1161 pandecode_find_mapped_gpu_mem_containing(jc_gpu_va);
1162
1163 pan_unpack(PANDECODE_PTR(mem, jc_gpu_va, struct mali_job_header_packed),
1164 JOB_HEADER, h);
1165 next_job = h.next;
1166
1167 int job_no = job_descriptor_number++;
1168
1169 DUMP_UNPACKED(JOB_HEADER, h, "Job Header:\n");
1170 pandecode_log("\n");
1171
1172 switch (h.type) {
1173 case MALI_JOB_TYPE_WRITE_VALUE:
1174 pandecode_write_value_job(mem, jc_gpu_va, job_no);
1175 break;
1176
1177 case MALI_JOB_TYPE_CACHE_FLUSH:
1178 pandecode_cache_flush_job(mem, jc_gpu_va, job_no);
1179 break;
1180
1181 case MALI_JOB_TYPE_TILER:
1182 #if PAN_ARCH >= 6
1183 pandecode_tiler_job_bfr(&h, mem, jc_gpu_va, job_no, gpu_id);
1184 #else
1185 pandecode_tiler_job_mdg(&h, mem, jc_gpu_va, job_no, gpu_id);
1186 #endif
1187 break;
1188
1189 case MALI_JOB_TYPE_VERTEX:
1190 case MALI_JOB_TYPE_COMPUTE:
1191 pandecode_vertex_compute_geometry_job(&h, mem, jc_gpu_va, job_no, gpu_id);
1192 break;
1193
1194 #if PAN_ARCH >= 6
1195 case MALI_JOB_TYPE_INDEXED_VERTEX:
1196 pandecode_indexed_vertex_job(&h, mem, jc_gpu_va, job_no, gpu_id);
1197 break;
1198 #endif
1199
1200 case MALI_JOB_TYPE_FRAGMENT:
1201 pandecode_fragment_job(mem, jc_gpu_va, job_no, gpu_id);
1202 break;
1203
1204 default:
1205 break;
1206 }
1207 } while ((jc_gpu_va = next_job));
1208
1209 fflush(pandecode_dump_stream);
1210 pandecode_map_read_write();
1211 }
1212
1213 void
GENX(pandecode_abort_on_fault)1214 GENX(pandecode_abort_on_fault)(mali_ptr jc_gpu_va)
1215 {
1216 mali_ptr next_job = 0;
1217
1218 do {
1219 struct pandecode_mapped_memory *mem =
1220 pandecode_find_mapped_gpu_mem_containing(jc_gpu_va);
1221
1222 pan_unpack(PANDECODE_PTR(mem, jc_gpu_va, struct mali_job_header_packed),
1223 JOB_HEADER, h);
1224 next_job = h.next;
1225
1226 /* Ensure the job is marked COMPLETE */
1227 if (h.exception_status != 0x1) {
1228 fprintf(stderr, "Incomplete job or timeout");
1229 abort();
1230 }
1231 } while ((jc_gpu_va = next_job));
1232
1233 pandecode_map_read_write();
1234 }
1235