1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #ifndef _WIN32
31 #include <sys/utsname.h>
32 #endif
33 #include <sys/stat.h>
34 
35 #include "util/mesa-sha1.h"
36 #include "ac_debug.h"
37 #include "radv_debug.h"
38 #include "radv_shader.h"
39 #include "sid.h"
40 
41 #define TRACE_BO_SIZE 4096
42 #define TMA_BO_SIZE   4096
43 
44 #define COLOR_RESET  "\033[0m"
45 #define COLOR_RED    "\033[31m"
46 #define COLOR_GREEN  "\033[1;32m"
47 #define COLOR_YELLOW "\033[1;33m"
48 #define COLOR_CYAN   "\033[1;36m"
49 
50 #define RADV_DUMP_DIR "radv_dumps"
51 
52 /* Trace BO layout (offsets are 4 bytes):
53  *
54  * [0]: primary trace ID
55  * [1]: secondary trace ID
56  * [2-3]: 64-bit GFX ring pipeline pointer
57  * [4-5]: 64-bit COMPUTE ring pipeline pointer
58  * [6-7]: Vertex descriptors pointer
59  * [8-9]: 64-bit descriptor set #0 pointer
60  * ...
61  * [68-69]: 64-bit descriptor set #31 pointer
62  */
63 
64 bool
radv_init_trace(struct radv_device * device)65 radv_init_trace(struct radv_device *device)
66 {
67    struct radeon_winsys *ws = device->ws;
68    VkResult result;
69 
70    result = ws->buffer_create(
71       ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
72       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
73       RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
74    if (result != VK_SUCCESS)
75       return false;
76 
77    result = ws->buffer_make_resident(ws, device->trace_bo, true);
78    if (result != VK_SUCCESS)
79       return false;
80 
81    device->trace_id_ptr = ws->buffer_map(device->trace_bo);
82    if (!device->trace_id_ptr)
83       return false;
84 
85    ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
86                        NULL);
87 
88    return true;
89 }
90 
91 void
radv_finish_trace(struct radv_device * device)92 radv_finish_trace(struct radv_device *device)
93 {
94    struct radeon_winsys *ws = device->ws;
95 
96    if (unlikely(device->trace_bo)) {
97       ws->buffer_make_resident(ws, device->trace_bo, false);
98       ws->buffer_destroy(ws, device->trace_bo);
99    }
100 }
101 
102 static void
radv_dump_trace(struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)103 radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
104 {
105    fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
106    device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
107 }
108 
109 static void
radv_dump_mmapped_reg(struct radv_device * device,FILE * f,unsigned offset)110 radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
111 {
112    struct radeon_winsys *ws = device->ws;
113    uint32_t value;
114 
115    if (ws->read_registers(ws, offset, 1, &value))
116       ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
117 }
118 
119 static void
radv_dump_debug_registers(struct radv_device * device,FILE * f)120 radv_dump_debug_registers(struct radv_device *device, FILE *f)
121 {
122    struct radeon_info *info = &device->physical_device->rad_info;
123 
124    fprintf(f, "Memory-mapped registers:\n");
125    radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
126 
127    radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
128    radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
129    radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
130    radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
131    radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
132    radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
133    radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
134    if (info->chip_class <= GFX8) {
135       radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
136       radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
137       radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
138    }
139    radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
140    radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
141    radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
142    radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
143    radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
144    radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
145    radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
146    radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
147    radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
148    radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
149    fprintf(f, "\n");
150 }
151 
152 static void
radv_dump_buffer_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)153 radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
154 {
155    fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
156    for (unsigned j = 0; j < 4; j++)
157       ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
158 }
159 
160 static void
radv_dump_image_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)161 radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
162 {
163    unsigned sq_img_rsrc_word0 =
164       chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
165 
166    fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
167    for (unsigned j = 0; j < 8; j++)
168       ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
169 
170    fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
171    for (unsigned j = 0; j < 8; j++)
172       ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
173 }
174 
175 static void
radv_dump_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)176 radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
177 {
178    fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
179    for (unsigned j = 0; j < 4; j++) {
180       ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
181    }
182 }
183 
184 static void
radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)185 radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
186                                             FILE *f)
187 {
188    radv_dump_image_descriptor(chip_class, desc, f);
189    radv_dump_sampler_descriptor(chip_class, desc + 16, f);
190 }
191 
192 static void
radv_dump_descriptor_set(struct radv_device * device,struct radv_descriptor_set * set,unsigned id,FILE * f)193 radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
194                          FILE *f)
195 {
196    enum chip_class chip_class = device->physical_device->rad_info.chip_class;
197    const struct radv_descriptor_set_layout *layout;
198    int i;
199 
200    if (!set)
201       return;
202    layout = set->header.layout;
203 
204    for (i = 0; i < set->header.layout->binding_count; i++) {
205       uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
206 
207       switch (layout->binding[i].type) {
208       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
209       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
210       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
211       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
212          radv_dump_buffer_descriptor(chip_class, desc, f);
213          break;
214       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
215       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
216       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
217          radv_dump_image_descriptor(chip_class, desc, f);
218          break;
219       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
220          radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
221          break;
222       case VK_DESCRIPTOR_TYPE_SAMPLER:
223          radv_dump_sampler_descriptor(chip_class, desc, f);
224          break;
225       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
226       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
227       case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
228       case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
229          /* todo */
230          break;
231       default:
232          assert(!"unknown descriptor type");
233          break;
234       }
235       fprintf(f, "\n");
236    }
237    fprintf(f, "\n\n");
238 }
239 
240 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)241 radv_dump_descriptors(struct radv_device *device, FILE *f)
242 {
243    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
244    int i;
245 
246    fprintf(f, "Descriptors:\n");
247    for (i = 0; i < MAX_SETS; i++) {
248       struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4);
249 
250       radv_dump_descriptor_set(device, set, i, f);
251    }
252 }
253 
254 struct radv_shader_inst {
255    char text[160];  /* one disasm line */
256    unsigned offset; /* instruction offset */
257    unsigned size;   /* instruction size = 4 or 8 */
258 };
259 
260 /* Split a disassembly string into lines and add them to the array pointed
261  * to by "instructions". */
262 static void
si_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)263 si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
264                     struct radv_shader_inst *instructions)
265 {
266    struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
267    char *next;
268 
269    while ((next = strchr(disasm, '\n'))) {
270       struct radv_shader_inst *inst = &instructions[*num];
271       unsigned len = next - disasm;
272 
273       if (!memchr(disasm, ';', len)) {
274          /* Ignore everything that is not an instruction. */
275          disasm = next + 1;
276          continue;
277       }
278 
279       assert(len < ARRAY_SIZE(inst->text));
280       memcpy(inst->text, disasm, len);
281       inst->text[len] = 0;
282       inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
283 
284       const char *semicolon = strchr(disasm, ';');
285       assert(semicolon);
286       /* More than 16 chars after ";" means the instruction is 8 bytes long. */
287       inst->size = next - semicolon > 16 ? 8 : 4;
288 
289       snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
290                " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
291                inst->size);
292 
293       last_inst = inst;
294       (*num)++;
295       disasm = next + 1;
296    }
297 }
298 
299 static void
radv_dump_annotated_shader(struct radv_shader_variant * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)300 radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage,
301                            struct ac_wave_info *waves, unsigned num_waves, FILE *f)
302 {
303    uint64_t start_addr, end_addr;
304    unsigned i;
305 
306    if (!shader)
307       return;
308 
309    start_addr = radv_shader_variant_get_va(shader);
310    end_addr = start_addr + shader->code_size;
311 
312    /* See if any wave executes the shader. */
313    for (i = 0; i < num_waves; i++) {
314       if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
315          break;
316    }
317 
318    if (i == num_waves)
319       return; /* the shader is not being executed */
320 
321    /* Remember the first found wave. The waves are sorted according to PC. */
322    waves = &waves[i];
323    num_waves -= i;
324 
325    /* Get the list of instructions.
326     * Buffer size / 4 is the upper bound of the instruction count.
327     */
328    unsigned num_inst = 0;
329    struct radv_shader_inst *instructions =
330       calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
331 
332    si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
333 
334    fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
335            radv_get_shader_name(&shader->info, stage));
336 
337    /* Print instructions with annotations. */
338    for (i = 0; i < num_inst; i++) {
339       struct radv_shader_inst *inst = &instructions[i];
340 
341       fprintf(f, "%s\n", inst->text);
342 
343       /* Print which waves execute the instruction right now. */
344       while (num_waves && start_addr + inst->offset == waves->pc) {
345          fprintf(f,
346                  "          " COLOR_GREEN "^ SE%u SH%u CU%u "
347                  "SIMD%u WAVE%u  EXEC=%016" PRIx64 "  ",
348                  waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
349 
350          if (inst->size == 4) {
351             fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
352          } else {
353             fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
354          }
355 
356          waves->matched = true;
357          waves = &waves[1];
358          num_waves--;
359       }
360    }
361 
362    fprintf(f, "\n\n");
363    free(instructions);
364 }
365 
366 static void
radv_dump_annotated_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,FILE * f)367 radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
368                             FILE *f)
369 {
370    struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
371    enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
372    unsigned num_waves = ac_get_wave_info(chip_class, waves);
373 
374    fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
375 
376    /* Dump annotated active graphics shaders. */
377    unsigned stages = active_stages;
378    while (stages) {
379       int stage = u_bit_scan(&stages);
380 
381       radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
382    }
383 
384    /* Print waves executing shaders that are not currently bound. */
385    unsigned i;
386    bool found = false;
387    for (i = 0; i < num_waves; i++) {
388       if (waves[i].matched)
389          continue;
390 
391       if (!found) {
392          fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
393          found = true;
394       }
395       fprintf(f,
396               "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016" PRIx64 "  INST=%08X %08X  PC=%" PRIx64
397               "\n",
398               waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
399               waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
400    }
401    if (found)
402       fprintf(f, "\n\n");
403 }
404 
405 static void
radv_dump_spirv(struct radv_shader_variant * shader,const char * sha1,const char * dump_dir)406 radv_dump_spirv(struct radv_shader_variant *shader, const char *sha1, const char *dump_dir)
407 {
408    char dump_path[512];
409    FILE *f;
410 
411    snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
412 
413    f = fopen(dump_path, "w+");
414    if (f) {
415       fwrite(shader->spirv, shader->spirv_size, 1, f);
416       fclose(f);
417    }
418 }
419 
420 static void
radv_dump_shader(struct radv_pipeline * pipeline,struct radv_shader_variant * shader,gl_shader_stage stage,const char * dump_dir,FILE * f)421 radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader,
422                  gl_shader_stage stage, const char *dump_dir, FILE *f)
423 {
424    if (!shader)
425       return;
426 
427    fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
428 
429    if (shader->spirv) {
430       unsigned char sha1[21];
431       char sha1buf[41];
432 
433       _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
434       _mesa_sha1_format(sha1buf, sha1);
435 
436       fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
437       radv_dump_spirv(shader, sha1buf, dump_dir);
438    }
439 
440    if (shader->nir_string) {
441       fprintf(f, "NIR:\n%s\n", shader->nir_string);
442    }
443 
444    fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
445            shader->ir_string);
446    fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
447 
448    radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
449 }
450 
451 static void
radv_dump_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,const char * dump_dir,FILE * f)452 radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
453                   const char *dump_dir, FILE *f)
454 {
455    /* Dump active graphics shaders. */
456    unsigned stages = active_stages;
457    while (stages) {
458       int stage = u_bit_scan(&stages);
459 
460       radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);
461    }
462 }
463 
464 static void
radv_dump_vertex_descriptors(struct radv_pipeline * pipeline,FILE * f)465 radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)
466 {
467    void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
468    uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);
469    uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
470 
471    if (!count)
472       return;
473 
474    fprintf(f, "Num vertex %s: %d\n",
475            pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);
476    for (uint32_t i = 0; i < count; i++) {
477       uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
478       uint64_t va = 0;
479 
480       va |= desc[0];
481       va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
482 
483       fprintf(f, "VBO#%d:\n", i);
484       fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
485       fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
486       fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
487    }
488 }
489 
490 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum ring_type ring)491 radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
492 {
493    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
494    int offset = ring == RING_GFX ? 1 : 2;
495 
496    return *(struct radv_pipeline **)(ptr + offset);
497 }
498 
499 static void
radv_dump_queue_state(struct radv_queue * queue,const char * dump_dir,FILE * f)500 radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
501 {
502    enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
503    struct radv_pipeline *pipeline;
504 
505    fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
506 
507    pipeline = radv_get_saved_pipeline(queue->device, ring);
508    if (pipeline) {
509       radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f);
510       if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
511          radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
512       radv_dump_vertex_descriptors(pipeline, f);
513       radv_dump_descriptors(queue->device, f);
514    }
515 }
516 
517 static void
radv_dump_cmd(const char * cmd,FILE * f)518 radv_dump_cmd(const char *cmd, FILE *f)
519 {
520 #ifndef _WIN32
521    char line[2048];
522    FILE *p;
523 
524    p = popen(cmd, "r");
525    if (p) {
526       while (fgets(line, sizeof(line), p))
527          fputs(line, f);
528       fprintf(f, "\n");
529       pclose(p);
530    }
531 #endif
532 }
533 
534 static void
radv_dump_dmesg(FILE * f)535 radv_dump_dmesg(FILE *f)
536 {
537    fprintf(f, "\nLast 60 lines of dmesg:\n\n");
538    radv_dump_cmd("dmesg | tail -n60", f);
539 }
540 
541 void
radv_dump_enabled_options(struct radv_device * device,FILE * f)542 radv_dump_enabled_options(struct radv_device *device, FILE *f)
543 {
544    uint64_t mask;
545 
546    if (device->instance->debug_flags) {
547       fprintf(f, "Enabled debug options: ");
548 
549       mask = device->instance->debug_flags;
550       while (mask) {
551          int i = u_bit_scan64(&mask);
552          fprintf(f, "%s, ", radv_get_debug_option_name(i));
553       }
554       fprintf(f, "\n");
555    }
556 
557    if (device->instance->perftest_flags) {
558       fprintf(f, "Enabled perftest options: ");
559 
560       mask = device->instance->perftest_flags;
561       while (mask) {
562          int i = u_bit_scan64(&mask);
563          fprintf(f, "%s, ", radv_get_perftest_option_name(i));
564       }
565       fprintf(f, "\n");
566    }
567 }
568 
569 static void
radv_dump_app_info(struct radv_device * device,FILE * f)570 radv_dump_app_info(struct radv_device *device, FILE *f)
571 {
572    struct radv_instance *instance = device->instance;
573 
574    fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
575    fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
576    fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
577    fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
578    fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
579            VK_VERSION_MINOR(instance->vk.app_info.api_version),
580            VK_VERSION_PATCH(instance->vk.app_info.api_version));
581 
582    radv_dump_enabled_options(device, f);
583 }
584 
585 static void
radv_dump_device_name(struct radv_device * device,FILE * f)586 radv_dump_device_name(struct radv_device *device, FILE *f)
587 {
588    struct radeon_info *info = &device->physical_device->rad_info;
589 #ifndef _WIN32
590    char kernel_version[128] = {0};
591    struct utsname uname_data;
592 #endif
593    const char *chip_name;
594 
595    chip_name = device->ws->get_chip_name(device->ws);
596 
597 #ifdef _WIN32
598    fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,
599            info->drm_major, info->drm_minor, info->drm_patchlevel);
600 #else
601    if (uname(&uname_data) == 0)
602       snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
603 
604    fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,
605            info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
606 #endif
607 }
608 
609 static void
radv_dump_umr_ring(struct radv_queue * queue,FILE * f)610 radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
611 {
612    enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
613    struct radv_device *device = queue->device;
614    char cmd[128];
615 
616    /* TODO: Dump compute ring. */
617    if (ring != RING_GFX)
618       return;
619 
620    sprintf(cmd, "umr -R %s 2>&1",
621            device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
622 
623    fprintf(f, "\nUMR GFX ring:\n\n");
624    radv_dump_cmd(cmd, f);
625 }
626 
627 static void
radv_dump_umr_waves(struct radv_queue * queue,FILE * f)628 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
629 {
630    enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
631    struct radv_device *device = queue->device;
632    char cmd[128];
633 
634    /* TODO: Dump compute ring. */
635    if (ring != RING_GFX)
636       return;
637 
638    sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
639            device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
640 
641    fprintf(f, "\nUMR GFX waves:\n\n");
642    radv_dump_cmd(cmd, f);
643 }
644 
645 static bool
radv_gpu_hang_occured(struct radv_queue * queue,enum ring_type ring)646 radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
647 {
648    struct radeon_winsys *ws = queue->device->ws;
649 
650    if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
651       return true;
652 
653    return false;
654 }
655 
656 void
radv_check_gpu_hangs(struct radv_queue * queue,struct radeon_cmdbuf * cs)657 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
658 {
659    struct radv_device *device = queue->device;
660    enum ring_type ring;
661    uint64_t addr;
662 
663    ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
664 
665    bool hang_occurred = radv_gpu_hang_occured(queue, ring);
666    bool vm_fault_occurred = false;
667    if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
668       vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
669                                               &device->dmesg_timestamp, &addr);
670    if (!hang_occurred && !vm_fault_occurred)
671       return;
672 
673    fprintf(stderr, "radv: GPU hang detected...\n");
674 
675 #ifndef _WIN32
676    /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
677     * various debugging info about that GPU hang.
678     */
679    struct tm *timep, result;
680    time_t raw_time;
681    FILE *f;
682    char dump_dir[256], dump_path[512], buf_time[128];
683 
684    time(&raw_time);
685    timep = os_localtime(&raw_time, &result);
686    strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
687 
688    snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
689             getpid(), buf_time);
690    if (mkdir(dump_dir, 0774) && errno != EEXIST) {
691       fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
692       abort();
693    }
694 
695    fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
696 
697    /* Dump trace file. */
698    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
699    f = fopen(dump_path, "w+");
700    if (f) {
701       radv_dump_trace(queue->device, cs, f);
702       fclose(f);
703    }
704 
705    /* Dump pipeline state. */
706    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
707    f = fopen(dump_path, "w+");
708    if (f) {
709       radv_dump_queue_state(queue, dump_dir, f);
710       fclose(f);
711    }
712 
713    if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
714       /* Dump UMR ring. */
715       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
716       f = fopen(dump_path, "w+");
717       if (f) {
718          radv_dump_umr_ring(queue, f);
719          fclose(f);
720       }
721 
722       /* Dump UMR waves. */
723       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
724       f = fopen(dump_path, "w+");
725       if (f) {
726          radv_dump_umr_waves(queue, f);
727          fclose(f);
728       }
729    }
730 
731    /* Dump debug registers. */
732    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
733    f = fopen(dump_path, "w+");
734    if (f) {
735       radv_dump_debug_registers(device, f);
736       fclose(f);
737    }
738 
739    /* Dump BO ranges. */
740    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
741    f = fopen(dump_path, "w+");
742    if (f) {
743       device->ws->dump_bo_ranges(device->ws, f);
744       fclose(f);
745    }
746 
747    /* Dump BO log. */
748    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
749    f = fopen(dump_path, "w+");
750    if (f) {
751       device->ws->dump_bo_log(device->ws, f);
752       fclose(f);
753    }
754 
755    /* Dump VM fault info. */
756    if (vm_fault_occurred) {
757       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
758       f = fopen(dump_path, "w+");
759       if (f) {
760          fprintf(f, "VM fault report.\n\n");
761          fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
762          fclose(f);
763       }
764    }
765 
766    /* Dump app info. */
767    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
768    f = fopen(dump_path, "w+");
769    if (f) {
770       radv_dump_app_info(device, f);
771       fclose(f);
772    }
773 
774    /* Dump GPU info. */
775    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
776    f = fopen(dump_path, "w+");
777    if (f) {
778       radv_dump_device_name(device, f);
779       ac_print_gpu_info(&device->physical_device->rad_info, f);
780       fclose(f);
781    }
782 
783    /* Dump dmesg. */
784    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
785    f = fopen(dump_path, "w+");
786    if (f) {
787       radv_dump_dmesg(f);
788       fclose(f);
789    }
790 #endif
791 
792    fprintf(stderr, "radv: GPU hang report saved successfully!\n");
793    abort();
794 }
795 
796 void
radv_print_spirv(const char * data,uint32_t size,FILE * fp)797 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
798 {
799 #ifndef _WIN32
800    char path[] = "/tmp/fileXXXXXX";
801    char command[128];
802    int fd;
803 
804    /* Dump the binary into a temporary file. */
805    fd = mkstemp(path);
806    if (fd < 0)
807       return;
808 
809    if (write(fd, data, size) == -1)
810       goto fail;
811 
812    /* Disassemble using spirv-dis if installed. */
813    sprintf(command, "spirv-dis %s", path);
814    radv_dump_cmd(command, fp);
815 
816 fail:
817    close(fd);
818    unlink(path);
819 #endif
820 }
821 
822 bool
radv_trap_handler_init(struct radv_device * device)823 radv_trap_handler_init(struct radv_device *device)
824 {
825    struct radeon_winsys *ws = device->ws;
826    VkResult result;
827 
828    /* Create the trap handler shader and upload it like other shaders. */
829    device->trap_handler_shader = radv_create_trap_handler_shader(device);
830    if (!device->trap_handler_shader) {
831       fprintf(stderr, "radv: failed to create the trap handler shader.\n");
832       return false;
833    }
834 
835    result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
836    if (result != VK_SUCCESS)
837       return false;
838 
839    result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
840                               RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
841                                  RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
842                               RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
843    if (result != VK_SUCCESS)
844       return false;
845 
846    result = ws->buffer_make_resident(ws, device->tma_bo, true);
847    if (result != VK_SUCCESS)
848       return false;
849 
850    device->tma_ptr = ws->buffer_map(device->tma_bo);
851    if (!device->tma_ptr)
852       return false;
853 
854    /* Upload a buffer descriptor to store various info from the trap. */
855    uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
856    uint32_t desc[4];
857 
858    desc[0] = tma_va;
859    desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
860    desc[2] = TMA_BO_SIZE;
861    desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
862              S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
863              S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
864 
865    memcpy(device->tma_ptr, desc, sizeof(desc));
866 
867    return true;
868 }
869 
870 void
radv_trap_handler_finish(struct radv_device * device)871 radv_trap_handler_finish(struct radv_device *device)
872 {
873    struct radeon_winsys *ws = device->ws;
874 
875    if (unlikely(device->trap_handler_shader)) {
876       ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
877       radv_shader_variant_destroy(device, device->trap_handler_shader);
878    }
879 
880    if (unlikely(device->tma_bo)) {
881       ws->buffer_make_resident(ws, device->tma_bo, false);
882       ws->buffer_destroy(ws, device->tma_bo);
883    }
884 }
885 
886 static void
radv_dump_faulty_shader(struct radv_device * device,uint64_t faulty_pc)887 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
888 {
889    struct radv_shader_variant *shader;
890    uint64_t start_addr, end_addr;
891    uint32_t instr_offset;
892 
893    shader = radv_find_shader_variant(device, faulty_pc);
894    if (!shader)
895       return;
896 
897    start_addr = radv_shader_variant_get_va(shader);
898    end_addr = start_addr + shader->code_size;
899    instr_offset = faulty_pc - start_addr;
900 
901    fprintf(stderr,
902            "Faulty shader found "
903            "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
904            start_addr, end_addr, instr_offset);
905 
906    /* Get the list of instructions.
907     * Buffer size / 4 is the upper bound of the instruction count.
908     */
909    unsigned num_inst = 0;
910    struct radv_shader_inst *instructions =
911       calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
912 
913    /* Split the disassembly string into instructions. */
914    si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
915 
916    /* Print instructions with annotations. */
917    for (unsigned i = 0; i < num_inst; i++) {
918       struct radv_shader_inst *inst = &instructions[i];
919 
920       if (start_addr + inst->offset == faulty_pc) {
921          fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
922          fprintf(stderr, "%s\n", inst->text);
923          fprintf(stderr, "\n");
924       } else {
925          fprintf(stderr, "%s\n", inst->text);
926       }
927    }
928 
929    free(instructions);
930 }
931 
932 struct radv_sq_hw_reg {
933    uint32_t status;
934    uint32_t trap_sts;
935    uint32_t hw_id;
936    uint32_t ib_sts;
937 };
938 
939 static void
radv_dump_sq_hw_regs(struct radv_device * device)940 radv_dump_sq_hw_regs(struct radv_device *device)
941 {
942    struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
943 
944    fprintf(stderr, "\nHardware registers:\n");
945    if (device->physical_device->rad_info.chip_class >= GFX10) {
946       ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
947                   regs->status, ~0);
948       ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
949                   regs->trap_sts, ~0);
950       ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
951                   regs->hw_id, ~0);
952       ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
953                   regs->ib_sts, ~0);
954    } else {
955       ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
956                   regs->status, ~0);
957       ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
958                   regs->trap_sts, ~0);
959       ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
960                   regs->hw_id, ~0);
961       ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
962                   regs->ib_sts, ~0);
963    }
964    fprintf(stderr, "\n\n");
965 }
966 
967 void
radv_check_trap_handler(struct radv_queue * queue)968 radv_check_trap_handler(struct radv_queue *queue)
969 {
970    enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
971    struct radv_device *device = queue->device;
972    struct radeon_winsys *ws = device->ws;
973 
974    /* Wait for the context to be idle in a finite time. */
975    ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
976 
977    /* Try to detect if the trap handler has been reached by the hw by
978     * looking at ttmp0 which should be non-zero if a shader exception
979     * happened.
980     */
981    if (!device->tma_ptr[4])
982       return;
983 
984 #if 0
985 	fprintf(stderr, "tma_ptr:\n");
986 	for (unsigned i = 0; i < 10; i++)
987 		fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
988 #endif
989 
990    radv_dump_sq_hw_regs(device);
991 
992    uint32_t ttmp0 = device->tma_ptr[4];
993    uint32_t ttmp1 = device->tma_ptr[5];
994 
995    /* According to the ISA docs, 3.10 Trap and Exception Registers:
996     *
997     * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
998     *
999     * "When the trap handler is entered, the PC of the faulting
1000     *  instruction is: (PC - PC_rewind * 4)."
1001     * */
1002    uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1003    uint8_t ht = (ttmp1 >> 24) & 0x1;
1004    uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1005    uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1006 
1007    fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
1008            pc_rewind);
1009 
1010    radv_dump_faulty_shader(device, pc);
1011 
1012    abort();
1013 }
1014