1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #ifndef _WIN32
31 #include <sys/utsname.h>
32 #endif
33 #include <sys/stat.h>
34
35 #include "util/mesa-sha1.h"
36 #include "ac_debug.h"
37 #include "radv_debug.h"
38 #include "radv_shader.h"
39 #include "sid.h"
40
41 #define TRACE_BO_SIZE 4096
42 #define TMA_BO_SIZE 4096
43
44 #define COLOR_RESET "\033[0m"
45 #define COLOR_RED "\033[31m"
46 #define COLOR_GREEN "\033[1;32m"
47 #define COLOR_YELLOW "\033[1;33m"
48 #define COLOR_CYAN "\033[1;36m"
49
50 #define RADV_DUMP_DIR "radv_dumps"
51
52 /* Trace BO layout (offsets are 4 bytes):
53 *
54 * [0]: primary trace ID
55 * [1]: secondary trace ID
56 * [2-3]: 64-bit GFX ring pipeline pointer
57 * [4-5]: 64-bit COMPUTE ring pipeline pointer
58 * [6-7]: Vertex descriptors pointer
59 * [8-9]: 64-bit descriptor set #0 pointer
60 * ...
61 * [68-69]: 64-bit descriptor set #31 pointer
62 */
63
64 bool
radv_init_trace(struct radv_device * device)65 radv_init_trace(struct radv_device *device)
66 {
67 struct radeon_winsys *ws = device->ws;
68 VkResult result;
69
70 result = ws->buffer_create(
71 ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
72 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
73 RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
74 if (result != VK_SUCCESS)
75 return false;
76
77 result = ws->buffer_make_resident(ws, device->trace_bo, true);
78 if (result != VK_SUCCESS)
79 return false;
80
81 device->trace_id_ptr = ws->buffer_map(device->trace_bo);
82 if (!device->trace_id_ptr)
83 return false;
84
85 ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
86 NULL);
87
88 return true;
89 }
90
91 void
radv_finish_trace(struct radv_device * device)92 radv_finish_trace(struct radv_device *device)
93 {
94 struct radeon_winsys *ws = device->ws;
95
96 if (unlikely(device->trace_bo)) {
97 ws->buffer_make_resident(ws, device->trace_bo, false);
98 ws->buffer_destroy(ws, device->trace_bo);
99 }
100 }
101
102 static void
radv_dump_trace(struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)103 radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
104 {
105 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
106 device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
107 }
108
109 static void
radv_dump_mmapped_reg(struct radv_device * device,FILE * f,unsigned offset)110 radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
111 {
112 struct radeon_winsys *ws = device->ws;
113 uint32_t value;
114
115 if (ws->read_registers(ws, offset, 1, &value))
116 ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
117 }
118
119 static void
radv_dump_debug_registers(struct radv_device * device,FILE * f)120 radv_dump_debug_registers(struct radv_device *device, FILE *f)
121 {
122 struct radeon_info *info = &device->physical_device->rad_info;
123
124 fprintf(f, "Memory-mapped registers:\n");
125 radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
126
127 radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
128 radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
129 radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
130 radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
131 radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
132 radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
133 radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
134 if (info->chip_class <= GFX8) {
135 radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
136 radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
137 radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
138 }
139 radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
140 radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
141 radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
142 radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
143 radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
144 radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
145 radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
146 radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
147 radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
148 radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
149 fprintf(f, "\n");
150 }
151
152 static void
radv_dump_buffer_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)153 radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
154 {
155 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
156 for (unsigned j = 0; j < 4; j++)
157 ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
158 }
159
160 static void
radv_dump_image_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)161 radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
162 {
163 unsigned sq_img_rsrc_word0 =
164 chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
165
166 fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
167 for (unsigned j = 0; j < 8; j++)
168 ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
169
170 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
171 for (unsigned j = 0; j < 8; j++)
172 ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
173 }
174
175 static void
radv_dump_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)176 radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
177 {
178 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
179 for (unsigned j = 0; j < 4; j++) {
180 ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
181 }
182 }
183
184 static void
radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)185 radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
186 FILE *f)
187 {
188 radv_dump_image_descriptor(chip_class, desc, f);
189 radv_dump_sampler_descriptor(chip_class, desc + 16, f);
190 }
191
192 static void
radv_dump_descriptor_set(struct radv_device * device,struct radv_descriptor_set * set,unsigned id,FILE * f)193 radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
194 FILE *f)
195 {
196 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
197 const struct radv_descriptor_set_layout *layout;
198 int i;
199
200 if (!set)
201 return;
202 layout = set->header.layout;
203
204 for (i = 0; i < set->header.layout->binding_count; i++) {
205 uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
206
207 switch (layout->binding[i].type) {
208 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
209 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
210 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
211 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
212 radv_dump_buffer_descriptor(chip_class, desc, f);
213 break;
214 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
215 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
216 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
217 radv_dump_image_descriptor(chip_class, desc, f);
218 break;
219 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
220 radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
221 break;
222 case VK_DESCRIPTOR_TYPE_SAMPLER:
223 radv_dump_sampler_descriptor(chip_class, desc, f);
224 break;
225 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
226 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
227 case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
228 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
229 /* todo */
230 break;
231 default:
232 assert(!"unknown descriptor type");
233 break;
234 }
235 fprintf(f, "\n");
236 }
237 fprintf(f, "\n\n");
238 }
239
240 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)241 radv_dump_descriptors(struct radv_device *device, FILE *f)
242 {
243 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
244 int i;
245
246 fprintf(f, "Descriptors:\n");
247 for (i = 0; i < MAX_SETS; i++) {
248 struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4);
249
250 radv_dump_descriptor_set(device, set, i, f);
251 }
252 }
253
254 struct radv_shader_inst {
255 char text[160]; /* one disasm line */
256 unsigned offset; /* instruction offset */
257 unsigned size; /* instruction size = 4 or 8 */
258 };
259
260 /* Split a disassembly string into lines and add them to the array pointed
261 * to by "instructions". */
262 static void
si_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)263 si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
264 struct radv_shader_inst *instructions)
265 {
266 struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
267 char *next;
268
269 while ((next = strchr(disasm, '\n'))) {
270 struct radv_shader_inst *inst = &instructions[*num];
271 unsigned len = next - disasm;
272
273 if (!memchr(disasm, ';', len)) {
274 /* Ignore everything that is not an instruction. */
275 disasm = next + 1;
276 continue;
277 }
278
279 assert(len < ARRAY_SIZE(inst->text));
280 memcpy(inst->text, disasm, len);
281 inst->text[len] = 0;
282 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
283
284 const char *semicolon = strchr(disasm, ';');
285 assert(semicolon);
286 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
287 inst->size = next - semicolon > 16 ? 8 : 4;
288
289 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
290 " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
291 inst->size);
292
293 last_inst = inst;
294 (*num)++;
295 disasm = next + 1;
296 }
297 }
298
299 static void
radv_dump_annotated_shader(struct radv_shader_variant * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)300 radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage,
301 struct ac_wave_info *waves, unsigned num_waves, FILE *f)
302 {
303 uint64_t start_addr, end_addr;
304 unsigned i;
305
306 if (!shader)
307 return;
308
309 start_addr = radv_shader_variant_get_va(shader);
310 end_addr = start_addr + shader->code_size;
311
312 /* See if any wave executes the shader. */
313 for (i = 0; i < num_waves; i++) {
314 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
315 break;
316 }
317
318 if (i == num_waves)
319 return; /* the shader is not being executed */
320
321 /* Remember the first found wave. The waves are sorted according to PC. */
322 waves = &waves[i];
323 num_waves -= i;
324
325 /* Get the list of instructions.
326 * Buffer size / 4 is the upper bound of the instruction count.
327 */
328 unsigned num_inst = 0;
329 struct radv_shader_inst *instructions =
330 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
331
332 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
333
334 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
335 radv_get_shader_name(&shader->info, stage));
336
337 /* Print instructions with annotations. */
338 for (i = 0; i < num_inst; i++) {
339 struct radv_shader_inst *inst = &instructions[i];
340
341 fprintf(f, "%s\n", inst->text);
342
343 /* Print which waves execute the instruction right now. */
344 while (num_waves && start_addr + inst->offset == waves->pc) {
345 fprintf(f,
346 " " COLOR_GREEN "^ SE%u SH%u CU%u "
347 "SIMD%u WAVE%u EXEC=%016" PRIx64 " ",
348 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
349
350 if (inst->size == 4) {
351 fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
352 } else {
353 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
354 }
355
356 waves->matched = true;
357 waves = &waves[1];
358 num_waves--;
359 }
360 }
361
362 fprintf(f, "\n\n");
363 free(instructions);
364 }
365
366 static void
radv_dump_annotated_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,FILE * f)367 radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
368 FILE *f)
369 {
370 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
371 enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
372 unsigned num_waves = ac_get_wave_info(chip_class, waves);
373
374 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
375
376 /* Dump annotated active graphics shaders. */
377 unsigned stages = active_stages;
378 while (stages) {
379 int stage = u_bit_scan(&stages);
380
381 radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
382 }
383
384 /* Print waves executing shaders that are not currently bound. */
385 unsigned i;
386 bool found = false;
387 for (i = 0; i < num_waves; i++) {
388 if (waves[i].matched)
389 continue;
390
391 if (!found) {
392 fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
393 found = true;
394 }
395 fprintf(f,
396 " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64
397 "\n",
398 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
399 waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
400 }
401 if (found)
402 fprintf(f, "\n\n");
403 }
404
405 static void
radv_dump_spirv(struct radv_shader_variant * shader,const char * sha1,const char * dump_dir)406 radv_dump_spirv(struct radv_shader_variant *shader, const char *sha1, const char *dump_dir)
407 {
408 char dump_path[512];
409 FILE *f;
410
411 snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
412
413 f = fopen(dump_path, "w+");
414 if (f) {
415 fwrite(shader->spirv, shader->spirv_size, 1, f);
416 fclose(f);
417 }
418 }
419
420 static void
radv_dump_shader(struct radv_pipeline * pipeline,struct radv_shader_variant * shader,gl_shader_stage stage,const char * dump_dir,FILE * f)421 radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader,
422 gl_shader_stage stage, const char *dump_dir, FILE *f)
423 {
424 if (!shader)
425 return;
426
427 fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
428
429 if (shader->spirv) {
430 unsigned char sha1[21];
431 char sha1buf[41];
432
433 _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
434 _mesa_sha1_format(sha1buf, sha1);
435
436 fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
437 radv_dump_spirv(shader, sha1buf, dump_dir);
438 }
439
440 if (shader->nir_string) {
441 fprintf(f, "NIR:\n%s\n", shader->nir_string);
442 }
443
444 fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
445 shader->ir_string);
446 fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
447
448 radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
449 }
450
451 static void
radv_dump_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,const char * dump_dir,FILE * f)452 radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
453 const char *dump_dir, FILE *f)
454 {
455 /* Dump active graphics shaders. */
456 unsigned stages = active_stages;
457 while (stages) {
458 int stage = u_bit_scan(&stages);
459
460 radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);
461 }
462 }
463
464 static void
radv_dump_vertex_descriptors(struct radv_pipeline * pipeline,FILE * f)465 radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)
466 {
467 void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
468 uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);
469 uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
470
471 if (!count)
472 return;
473
474 fprintf(f, "Num vertex %s: %d\n",
475 pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);
476 for (uint32_t i = 0; i < count; i++) {
477 uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
478 uint64_t va = 0;
479
480 va |= desc[0];
481 va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
482
483 fprintf(f, "VBO#%d:\n", i);
484 fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
485 fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
486 fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
487 }
488 }
489
490 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum ring_type ring)491 radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
492 {
493 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
494 int offset = ring == RING_GFX ? 1 : 2;
495
496 return *(struct radv_pipeline **)(ptr + offset);
497 }
498
499 static void
radv_dump_queue_state(struct radv_queue * queue,const char * dump_dir,FILE * f)500 radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
501 {
502 enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
503 struct radv_pipeline *pipeline;
504
505 fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
506
507 pipeline = radv_get_saved_pipeline(queue->device, ring);
508 if (pipeline) {
509 radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f);
510 if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
511 radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
512 radv_dump_vertex_descriptors(pipeline, f);
513 radv_dump_descriptors(queue->device, f);
514 }
515 }
516
517 static void
radv_dump_cmd(const char * cmd,FILE * f)518 radv_dump_cmd(const char *cmd, FILE *f)
519 {
520 #ifndef _WIN32
521 char line[2048];
522 FILE *p;
523
524 p = popen(cmd, "r");
525 if (p) {
526 while (fgets(line, sizeof(line), p))
527 fputs(line, f);
528 fprintf(f, "\n");
529 pclose(p);
530 }
531 #endif
532 }
533
534 static void
radv_dump_dmesg(FILE * f)535 radv_dump_dmesg(FILE *f)
536 {
537 fprintf(f, "\nLast 60 lines of dmesg:\n\n");
538 radv_dump_cmd("dmesg | tail -n60", f);
539 }
540
541 void
radv_dump_enabled_options(struct radv_device * device,FILE * f)542 radv_dump_enabled_options(struct radv_device *device, FILE *f)
543 {
544 uint64_t mask;
545
546 if (device->instance->debug_flags) {
547 fprintf(f, "Enabled debug options: ");
548
549 mask = device->instance->debug_flags;
550 while (mask) {
551 int i = u_bit_scan64(&mask);
552 fprintf(f, "%s, ", radv_get_debug_option_name(i));
553 }
554 fprintf(f, "\n");
555 }
556
557 if (device->instance->perftest_flags) {
558 fprintf(f, "Enabled perftest options: ");
559
560 mask = device->instance->perftest_flags;
561 while (mask) {
562 int i = u_bit_scan64(&mask);
563 fprintf(f, "%s, ", radv_get_perftest_option_name(i));
564 }
565 fprintf(f, "\n");
566 }
567 }
568
569 static void
radv_dump_app_info(struct radv_device * device,FILE * f)570 radv_dump_app_info(struct radv_device *device, FILE *f)
571 {
572 struct radv_instance *instance = device->instance;
573
574 fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
575 fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
576 fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
577 fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
578 fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
579 VK_VERSION_MINOR(instance->vk.app_info.api_version),
580 VK_VERSION_PATCH(instance->vk.app_info.api_version));
581
582 radv_dump_enabled_options(device, f);
583 }
584
585 static void
radv_dump_device_name(struct radv_device * device,FILE * f)586 radv_dump_device_name(struct radv_device *device, FILE *f)
587 {
588 struct radeon_info *info = &device->physical_device->rad_info;
589 #ifndef _WIN32
590 char kernel_version[128] = {0};
591 struct utsname uname_data;
592 #endif
593 const char *chip_name;
594
595 chip_name = device->ws->get_chip_name(device->ws);
596
597 #ifdef _WIN32
598 fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,
599 info->drm_major, info->drm_minor, info->drm_patchlevel);
600 #else
601 if (uname(&uname_data) == 0)
602 snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
603
604 fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,
605 info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
606 #endif
607 }
608
609 static void
radv_dump_umr_ring(struct radv_queue * queue,FILE * f)610 radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
611 {
612 enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
613 struct radv_device *device = queue->device;
614 char cmd[128];
615
616 /* TODO: Dump compute ring. */
617 if (ring != RING_GFX)
618 return;
619
620 sprintf(cmd, "umr -R %s 2>&1",
621 device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
622
623 fprintf(f, "\nUMR GFX ring:\n\n");
624 radv_dump_cmd(cmd, f);
625 }
626
627 static void
radv_dump_umr_waves(struct radv_queue * queue,FILE * f)628 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
629 {
630 enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
631 struct radv_device *device = queue->device;
632 char cmd[128];
633
634 /* TODO: Dump compute ring. */
635 if (ring != RING_GFX)
636 return;
637
638 sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
639 device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
640
641 fprintf(f, "\nUMR GFX waves:\n\n");
642 radv_dump_cmd(cmd, f);
643 }
644
645 static bool
radv_gpu_hang_occured(struct radv_queue * queue,enum ring_type ring)646 radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
647 {
648 struct radeon_winsys *ws = queue->device->ws;
649
650 if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
651 return true;
652
653 return false;
654 }
655
656 void
radv_check_gpu_hangs(struct radv_queue * queue,struct radeon_cmdbuf * cs)657 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
658 {
659 struct radv_device *device = queue->device;
660 enum ring_type ring;
661 uint64_t addr;
662
663 ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
664
665 bool hang_occurred = radv_gpu_hang_occured(queue, ring);
666 bool vm_fault_occurred = false;
667 if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
668 vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
669 &device->dmesg_timestamp, &addr);
670 if (!hang_occurred && !vm_fault_occurred)
671 return;
672
673 fprintf(stderr, "radv: GPU hang detected...\n");
674
675 #ifndef _WIN32
676 /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
677 * various debugging info about that GPU hang.
678 */
679 struct tm *timep, result;
680 time_t raw_time;
681 FILE *f;
682 char dump_dir[256], dump_path[512], buf_time[128];
683
684 time(&raw_time);
685 timep = os_localtime(&raw_time, &result);
686 strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
687
688 snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
689 getpid(), buf_time);
690 if (mkdir(dump_dir, 0774) && errno != EEXIST) {
691 fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
692 abort();
693 }
694
695 fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
696
697 /* Dump trace file. */
698 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
699 f = fopen(dump_path, "w+");
700 if (f) {
701 radv_dump_trace(queue->device, cs, f);
702 fclose(f);
703 }
704
705 /* Dump pipeline state. */
706 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
707 f = fopen(dump_path, "w+");
708 if (f) {
709 radv_dump_queue_state(queue, dump_dir, f);
710 fclose(f);
711 }
712
713 if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
714 /* Dump UMR ring. */
715 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
716 f = fopen(dump_path, "w+");
717 if (f) {
718 radv_dump_umr_ring(queue, f);
719 fclose(f);
720 }
721
722 /* Dump UMR waves. */
723 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
724 f = fopen(dump_path, "w+");
725 if (f) {
726 radv_dump_umr_waves(queue, f);
727 fclose(f);
728 }
729 }
730
731 /* Dump debug registers. */
732 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
733 f = fopen(dump_path, "w+");
734 if (f) {
735 radv_dump_debug_registers(device, f);
736 fclose(f);
737 }
738
739 /* Dump BO ranges. */
740 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
741 f = fopen(dump_path, "w+");
742 if (f) {
743 device->ws->dump_bo_ranges(device->ws, f);
744 fclose(f);
745 }
746
747 /* Dump BO log. */
748 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
749 f = fopen(dump_path, "w+");
750 if (f) {
751 device->ws->dump_bo_log(device->ws, f);
752 fclose(f);
753 }
754
755 /* Dump VM fault info. */
756 if (vm_fault_occurred) {
757 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
758 f = fopen(dump_path, "w+");
759 if (f) {
760 fprintf(f, "VM fault report.\n\n");
761 fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
762 fclose(f);
763 }
764 }
765
766 /* Dump app info. */
767 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
768 f = fopen(dump_path, "w+");
769 if (f) {
770 radv_dump_app_info(device, f);
771 fclose(f);
772 }
773
774 /* Dump GPU info. */
775 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
776 f = fopen(dump_path, "w+");
777 if (f) {
778 radv_dump_device_name(device, f);
779 ac_print_gpu_info(&device->physical_device->rad_info, f);
780 fclose(f);
781 }
782
783 /* Dump dmesg. */
784 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
785 f = fopen(dump_path, "w+");
786 if (f) {
787 radv_dump_dmesg(f);
788 fclose(f);
789 }
790 #endif
791
792 fprintf(stderr, "radv: GPU hang report saved successfully!\n");
793 abort();
794 }
795
796 void
radv_print_spirv(const char * data,uint32_t size,FILE * fp)797 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
798 {
799 #ifndef _WIN32
800 char path[] = "/tmp/fileXXXXXX";
801 char command[128];
802 int fd;
803
804 /* Dump the binary into a temporary file. */
805 fd = mkstemp(path);
806 if (fd < 0)
807 return;
808
809 if (write(fd, data, size) == -1)
810 goto fail;
811
812 /* Disassemble using spirv-dis if installed. */
813 sprintf(command, "spirv-dis %s", path);
814 radv_dump_cmd(command, fp);
815
816 fail:
817 close(fd);
818 unlink(path);
819 #endif
820 }
821
822 bool
radv_trap_handler_init(struct radv_device * device)823 radv_trap_handler_init(struct radv_device *device)
824 {
825 struct radeon_winsys *ws = device->ws;
826 VkResult result;
827
828 /* Create the trap handler shader and upload it like other shaders. */
829 device->trap_handler_shader = radv_create_trap_handler_shader(device);
830 if (!device->trap_handler_shader) {
831 fprintf(stderr, "radv: failed to create the trap handler shader.\n");
832 return false;
833 }
834
835 result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
836 if (result != VK_SUCCESS)
837 return false;
838
839 result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
840 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
841 RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
842 RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
843 if (result != VK_SUCCESS)
844 return false;
845
846 result = ws->buffer_make_resident(ws, device->tma_bo, true);
847 if (result != VK_SUCCESS)
848 return false;
849
850 device->tma_ptr = ws->buffer_map(device->tma_bo);
851 if (!device->tma_ptr)
852 return false;
853
854 /* Upload a buffer descriptor to store various info from the trap. */
855 uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
856 uint32_t desc[4];
857
858 desc[0] = tma_va;
859 desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
860 desc[2] = TMA_BO_SIZE;
861 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
862 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
863 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
864
865 memcpy(device->tma_ptr, desc, sizeof(desc));
866
867 return true;
868 }
869
870 void
radv_trap_handler_finish(struct radv_device * device)871 radv_trap_handler_finish(struct radv_device *device)
872 {
873 struct radeon_winsys *ws = device->ws;
874
875 if (unlikely(device->trap_handler_shader)) {
876 ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
877 radv_shader_variant_destroy(device, device->trap_handler_shader);
878 }
879
880 if (unlikely(device->tma_bo)) {
881 ws->buffer_make_resident(ws, device->tma_bo, false);
882 ws->buffer_destroy(ws, device->tma_bo);
883 }
884 }
885
886 static void
radv_dump_faulty_shader(struct radv_device * device,uint64_t faulty_pc)887 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
888 {
889 struct radv_shader_variant *shader;
890 uint64_t start_addr, end_addr;
891 uint32_t instr_offset;
892
893 shader = radv_find_shader_variant(device, faulty_pc);
894 if (!shader)
895 return;
896
897 start_addr = radv_shader_variant_get_va(shader);
898 end_addr = start_addr + shader->code_size;
899 instr_offset = faulty_pc - start_addr;
900
901 fprintf(stderr,
902 "Faulty shader found "
903 "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
904 start_addr, end_addr, instr_offset);
905
906 /* Get the list of instructions.
907 * Buffer size / 4 is the upper bound of the instruction count.
908 */
909 unsigned num_inst = 0;
910 struct radv_shader_inst *instructions =
911 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
912
913 /* Split the disassembly string into instructions. */
914 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
915
916 /* Print instructions with annotations. */
917 for (unsigned i = 0; i < num_inst; i++) {
918 struct radv_shader_inst *inst = &instructions[i];
919
920 if (start_addr + inst->offset == faulty_pc) {
921 fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
922 fprintf(stderr, "%s\n", inst->text);
923 fprintf(stderr, "\n");
924 } else {
925 fprintf(stderr, "%s\n", inst->text);
926 }
927 }
928
929 free(instructions);
930 }
931
932 struct radv_sq_hw_reg {
933 uint32_t status;
934 uint32_t trap_sts;
935 uint32_t hw_id;
936 uint32_t ib_sts;
937 };
938
939 static void
radv_dump_sq_hw_regs(struct radv_device * device)940 radv_dump_sq_hw_regs(struct radv_device *device)
941 {
942 struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
943
944 fprintf(stderr, "\nHardware registers:\n");
945 if (device->physical_device->rad_info.chip_class >= GFX10) {
946 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
947 regs->status, ~0);
948 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
949 regs->trap_sts, ~0);
950 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
951 regs->hw_id, ~0);
952 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
953 regs->ib_sts, ~0);
954 } else {
955 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
956 regs->status, ~0);
957 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
958 regs->trap_sts, ~0);
959 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
960 regs->hw_id, ~0);
961 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
962 regs->ib_sts, ~0);
963 }
964 fprintf(stderr, "\n\n");
965 }
966
967 void
radv_check_trap_handler(struct radv_queue * queue)968 radv_check_trap_handler(struct radv_queue *queue)
969 {
970 enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
971 struct radv_device *device = queue->device;
972 struct radeon_winsys *ws = device->ws;
973
974 /* Wait for the context to be idle in a finite time. */
975 ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
976
977 /* Try to detect if the trap handler has been reached by the hw by
978 * looking at ttmp0 which should be non-zero if a shader exception
979 * happened.
980 */
981 if (!device->tma_ptr[4])
982 return;
983
984 #if 0
985 fprintf(stderr, "tma_ptr:\n");
986 for (unsigned i = 0; i < 10; i++)
987 fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
988 #endif
989
990 radv_dump_sq_hw_regs(device);
991
992 uint32_t ttmp0 = device->tma_ptr[4];
993 uint32_t ttmp1 = device->tma_ptr[5];
994
995 /* According to the ISA docs, 3.10 Trap and Exception Registers:
996 *
997 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
998 *
999 * "When the trap handler is entered, the PC of the faulting
1000 * instruction is: (PC - PC_rewind * 4)."
1001 * */
1002 uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1003 uint8_t ht = (ttmp1 >> 24) & 0x1;
1004 uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1005 uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1006
1007 fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
1008 pc_rewind);
1009
1010 radv_dump_faulty_shader(device, pc);
1011
1012 abort();
1013 }
1014