1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #ifndef _WIN32
31 #include <sys/utsname.h>
32 #endif
33 #include <sys/stat.h>
34
35 #include "util/mesa-sha1.h"
36 #include "ac_debug.h"
37 #include "radv_debug.h"
38 #include "radv_shader.h"
39 #include "sid.h"
40
41 #define TRACE_BO_SIZE 4096
42 #define TMA_BO_SIZE 4096
43
44 #define COLOR_RESET "\033[0m"
45 #define COLOR_RED "\033[31m"
46 #define COLOR_GREEN "\033[1;32m"
47 #define COLOR_YELLOW "\033[1;33m"
48 #define COLOR_CYAN "\033[1;36m"
49
50 #define RADV_DUMP_DIR "radv_dumps"
51
52 /* Trace BO layout (offsets are 4 bytes):
53 *
54 * [0]: primary trace ID
55 * [1]: secondary trace ID
56 * [2-3]: 64-bit GFX ring pipeline pointer
57 * [4-5]: 64-bit COMPUTE ring pipeline pointer
58 * [6-7]: Vertex descriptors pointer
59 * [8-9]: 64-bit Vertex prolog pointer
60 * [10-11]: 64-bit descriptor set #0 pointer
61 * ...
62 * [72-73]: 64-bit descriptor set #31 pointer
63 */
64
65 bool
radv_init_trace(struct radv_device * device)66 radv_init_trace(struct radv_device *device)
67 {
68 struct radeon_winsys *ws = device->ws;
69 VkResult result;
70
71 result = ws->buffer_create(
72 ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
73 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
74 RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
75 if (result != VK_SUCCESS)
76 return false;
77
78 result = ws->buffer_make_resident(ws, device->trace_bo, true);
79 if (result != VK_SUCCESS)
80 return false;
81
82 device->trace_id_ptr = ws->buffer_map(device->trace_bo);
83 if (!device->trace_id_ptr)
84 return false;
85
86 ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
87 NULL);
88
89 return true;
90 }
91
92 void
radv_finish_trace(struct radv_device * device)93 radv_finish_trace(struct radv_device *device)
94 {
95 struct radeon_winsys *ws = device->ws;
96
97 if (unlikely(device->trace_bo)) {
98 ws->buffer_make_resident(ws, device->trace_bo, false);
99 ws->buffer_destroy(ws, device->trace_bo);
100 }
101 }
102
103 static void
radv_dump_trace(struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)104 radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
105 {
106 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
107 device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
108 }
109
110 static void
radv_dump_mmapped_reg(struct radv_device * device,FILE * f,unsigned offset)111 radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
112 {
113 struct radeon_winsys *ws = device->ws;
114 uint32_t value;
115
116 if (ws->read_registers(ws, offset, 1, &value))
117 ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
118 }
119
120 static void
radv_dump_debug_registers(struct radv_device * device,FILE * f)121 radv_dump_debug_registers(struct radv_device *device, FILE *f)
122 {
123 struct radeon_info *info = &device->physical_device->rad_info;
124
125 fprintf(f, "Memory-mapped registers:\n");
126 radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
127
128 radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
129 radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
130 radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
131 radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
132 radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
133 radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
134 radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
135 if (info->chip_class <= GFX8) {
136 radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
137 radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
138 radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
139 }
140 radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
141 radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
142 radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
143 radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
144 radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
145 radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
146 radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
147 radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
148 radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
149 radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
150 fprintf(f, "\n");
151 }
152
153 static void
radv_dump_buffer_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)154 radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
155 {
156 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
157 for (unsigned j = 0; j < 4; j++)
158 ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
159 }
160
161 static void
radv_dump_image_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)162 radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
163 {
164 unsigned sq_img_rsrc_word0 =
165 chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
166
167 fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
168 for (unsigned j = 0; j < 8; j++)
169 ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
170
171 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
172 for (unsigned j = 0; j < 8; j++)
173 ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
174 }
175
176 static void
radv_dump_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)177 radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
178 {
179 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
180 for (unsigned j = 0; j < 4; j++) {
181 ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
182 }
183 }
184
185 static void
radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)186 radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
187 FILE *f)
188 {
189 radv_dump_image_descriptor(chip_class, desc, f);
190 radv_dump_sampler_descriptor(chip_class, desc + 16, f);
191 }
192
193 static void
radv_dump_descriptor_set(struct radv_device * device,struct radv_descriptor_set * set,unsigned id,FILE * f)194 radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
195 FILE *f)
196 {
197 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
198 const struct radv_descriptor_set_layout *layout;
199 int i;
200
201 if (!set)
202 return;
203 layout = set->header.layout;
204
205 for (i = 0; i < set->header.layout->binding_count; i++) {
206 uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
207
208 switch (layout->binding[i].type) {
209 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
210 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
211 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
212 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
213 radv_dump_buffer_descriptor(chip_class, desc, f);
214 break;
215 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
216 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
217 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
218 radv_dump_image_descriptor(chip_class, desc, f);
219 break;
220 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
221 radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
222 break;
223 case VK_DESCRIPTOR_TYPE_SAMPLER:
224 radv_dump_sampler_descriptor(chip_class, desc, f);
225 break;
226 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
227 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
228 case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
229 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
230 /* todo */
231 break;
232 default:
233 assert(!"unknown descriptor type");
234 break;
235 }
236 fprintf(f, "\n");
237 }
238 fprintf(f, "\n\n");
239 }
240
241 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)242 radv_dump_descriptors(struct radv_device *device, FILE *f)
243 {
244 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
245 int i;
246
247 fprintf(f, "Descriptors:\n");
248 for (i = 0; i < MAX_SETS; i++) {
249 struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 5);
250
251 radv_dump_descriptor_set(device, set, i, f);
252 }
253 }
254
255 struct radv_shader_inst {
256 char text[160]; /* one disasm line */
257 unsigned offset; /* instruction offset */
258 unsigned size; /* instruction size = 4 or 8 */
259 };
260
261 /* Split a disassembly string into lines and add them to the array pointed
262 * to by "instructions". */
263 static void
si_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)264 si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
265 struct radv_shader_inst *instructions)
266 {
267 struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
268 char *next;
269
270 while ((next = strchr(disasm, '\n'))) {
271 struct radv_shader_inst *inst = &instructions[*num];
272 unsigned len = next - disasm;
273
274 if (!memchr(disasm, ';', len)) {
275 /* Ignore everything that is not an instruction. */
276 disasm = next + 1;
277 continue;
278 }
279
280 assert(len < ARRAY_SIZE(inst->text));
281 memcpy(inst->text, disasm, len);
282 inst->text[len] = 0;
283 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
284
285 const char *semicolon = strchr(disasm, ';');
286 assert(semicolon);
287 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
288 inst->size = next - semicolon > 16 ? 8 : 4;
289
290 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
291 " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
292 inst->size);
293
294 last_inst = inst;
295 (*num)++;
296 disasm = next + 1;
297 }
298 }
299
300 static void
radv_dump_annotated_shader(struct radv_shader * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)301 radv_dump_annotated_shader(struct radv_shader *shader, gl_shader_stage stage,
302 struct ac_wave_info *waves, unsigned num_waves, FILE *f)
303 {
304 uint64_t start_addr, end_addr;
305 unsigned i;
306
307 if (!shader)
308 return;
309
310 start_addr = radv_shader_get_va(shader);
311 end_addr = start_addr + shader->code_size;
312
313 /* See if any wave executes the shader. */
314 for (i = 0; i < num_waves; i++) {
315 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
316 break;
317 }
318
319 if (i == num_waves)
320 return; /* the shader is not being executed */
321
322 /* Remember the first found wave. The waves are sorted according to PC. */
323 waves = &waves[i];
324 num_waves -= i;
325
326 /* Get the list of instructions.
327 * Buffer size / 4 is the upper bound of the instruction count.
328 */
329 unsigned num_inst = 0;
330 struct radv_shader_inst *instructions =
331 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
332
333 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
334
335 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
336 radv_get_shader_name(&shader->info, stage));
337
338 /* Print instructions with annotations. */
339 for (i = 0; i < num_inst; i++) {
340 struct radv_shader_inst *inst = &instructions[i];
341
342 fprintf(f, "%s\n", inst->text);
343
344 /* Print which waves execute the instruction right now. */
345 while (num_waves && start_addr + inst->offset == waves->pc) {
346 fprintf(f,
347 " " COLOR_GREEN "^ SE%u SH%u CU%u "
348 "SIMD%u WAVE%u EXEC=%016" PRIx64 " ",
349 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
350
351 if (inst->size == 4) {
352 fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
353 } else {
354 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
355 }
356
357 waves->matched = true;
358 waves = &waves[1];
359 num_waves--;
360 }
361 }
362
363 fprintf(f, "\n\n");
364 free(instructions);
365 }
366
367 static void
radv_dump_annotated_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,FILE * f)368 radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
369 FILE *f)
370 {
371 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
372 enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
373 unsigned num_waves = ac_get_wave_info(chip_class, waves);
374
375 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
376
377 /* Dump annotated active graphics shaders. */
378 unsigned stages = active_stages;
379 while (stages) {
380 int stage = u_bit_scan(&stages);
381
382 radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
383 }
384
385 /* Print waves executing shaders that are not currently bound. */
386 unsigned i;
387 bool found = false;
388 for (i = 0; i < num_waves; i++) {
389 if (waves[i].matched)
390 continue;
391
392 if (!found) {
393 fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
394 found = true;
395 }
396 fprintf(f,
397 " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64
398 "\n",
399 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
400 waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
401 }
402 if (found)
403 fprintf(f, "\n\n");
404 }
405
406 static void
radv_dump_spirv(struct radv_shader * shader,const char * sha1,const char * dump_dir)407 radv_dump_spirv(struct radv_shader *shader, const char *sha1, const char *dump_dir)
408 {
409 char dump_path[512];
410 FILE *f;
411
412 snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
413
414 f = fopen(dump_path, "w+");
415 if (f) {
416 fwrite(shader->spirv, shader->spirv_size, 1, f);
417 fclose(f);
418 }
419 }
420
421 static void
radv_dump_shader(struct radv_pipeline * pipeline,struct radv_shader * shader,gl_shader_stage stage,const char * dump_dir,FILE * f)422 radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader *shader,
423 gl_shader_stage stage, const char *dump_dir, FILE *f)
424 {
425 if (!shader)
426 return;
427
428 fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
429
430 if (shader->spirv) {
431 unsigned char sha1[21];
432 char sha1buf[41];
433
434 _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
435 _mesa_sha1_format(sha1buf, sha1);
436
437 fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
438 radv_dump_spirv(shader, sha1buf, dump_dir);
439 }
440
441 if (shader->nir_string) {
442 fprintf(f, "NIR:\n%s\n", shader->nir_string);
443 }
444
445 fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
446 shader->ir_string);
447 fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
448
449 radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
450 }
451
452 static void
radv_dump_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,const char * dump_dir,FILE * f)453 radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
454 const char *dump_dir, FILE *f)
455 {
456 /* Dump active graphics shaders. */
457 unsigned stages = active_stages;
458 while (stages) {
459 int stage = u_bit_scan(&stages);
460
461 radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);
462 }
463 }
464
465 static void
radv_dump_vertex_descriptors(struct radv_pipeline * pipeline,FILE * f)466 radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)
467 {
468 void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
469 uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);
470 uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
471
472 if (!count)
473 return;
474
475 fprintf(f, "Num vertex %s: %d\n",
476 pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);
477 for (uint32_t i = 0; i < count; i++) {
478 uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
479 uint64_t va = 0;
480
481 va |= desc[0];
482 va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
483
484 fprintf(f, "VBO#%d:\n", i);
485 fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
486 fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
487 fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
488 }
489 }
490
491 static struct radv_shader_prolog *
radv_get_saved_vs_prolog(struct radv_device * device)492 radv_get_saved_vs_prolog(struct radv_device *device)
493 {
494 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
495 return *(struct radv_shader_prolog **)(ptr + 4);
496 }
497
498 static void
radv_dump_vs_prolog(struct radv_pipeline * pipeline,FILE * f)499 radv_dump_vs_prolog(struct radv_pipeline *pipeline, FILE *f)
500 {
501 struct radv_shader_prolog *vs_prolog = radv_get_saved_vs_prolog(pipeline->device);
502 struct radv_shader *vs_shader = radv_get_shader(pipeline, MESA_SHADER_VERTEX);
503
504 if (!vs_prolog || !vs_shader || !vs_shader->info.vs.has_prolog)
505 return;
506
507 fprintf(f, "Vertex prolog:\n\n");
508 fprintf(f, "DISASM:\n%s\n", vs_prolog->disasm_string);
509 }
510
511 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum ring_type ring)512 radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
513 {
514 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
515 int offset = ring == RING_GFX ? 1 : 2;
516
517 return *(struct radv_pipeline **)(ptr + offset);
518 }
519
520 static void
radv_dump_queue_state(struct radv_queue * queue,const char * dump_dir,FILE * f)521 radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
522 {
523 enum ring_type ring = radv_queue_ring(queue);
524 struct radv_pipeline *pipeline;
525
526 fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
527
528 pipeline = radv_get_saved_pipeline(queue->device, ring);
529 if (pipeline) {
530 radv_dump_vs_prolog(pipeline, f);
531 radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f);
532 if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
533 radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
534 radv_dump_vertex_descriptors(pipeline, f);
535 radv_dump_descriptors(queue->device, f);
536 }
537 }
538
539 static void
radv_dump_cmd(const char * cmd,FILE * f)540 radv_dump_cmd(const char *cmd, FILE *f)
541 {
542 #ifndef _WIN32
543 char line[2048];
544 FILE *p;
545
546 p = popen(cmd, "r");
547 if (p) {
548 while (fgets(line, sizeof(line), p))
549 fputs(line, f);
550 fprintf(f, "\n");
551 pclose(p);
552 }
553 #endif
554 }
555
556 static void
radv_dump_dmesg(FILE * f)557 radv_dump_dmesg(FILE *f)
558 {
559 fprintf(f, "\nLast 60 lines of dmesg:\n\n");
560 radv_dump_cmd("dmesg | tail -n60", f);
561 }
562
563 void
radv_dump_enabled_options(struct radv_device * device,FILE * f)564 radv_dump_enabled_options(struct radv_device *device, FILE *f)
565 {
566 uint64_t mask;
567
568 if (device->instance->debug_flags) {
569 fprintf(f, "Enabled debug options: ");
570
571 mask = device->instance->debug_flags;
572 while (mask) {
573 int i = u_bit_scan64(&mask);
574 fprintf(f, "%s, ", radv_get_debug_option_name(i));
575 }
576 fprintf(f, "\n");
577 }
578
579 if (device->instance->perftest_flags) {
580 fprintf(f, "Enabled perftest options: ");
581
582 mask = device->instance->perftest_flags;
583 while (mask) {
584 int i = u_bit_scan64(&mask);
585 fprintf(f, "%s, ", radv_get_perftest_option_name(i));
586 }
587 fprintf(f, "\n");
588 }
589 }
590
591 static void
radv_dump_app_info(struct radv_device * device,FILE * f)592 radv_dump_app_info(struct radv_device *device, FILE *f)
593 {
594 struct radv_instance *instance = device->instance;
595
596 fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
597 fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
598 fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
599 fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
600 fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
601 VK_VERSION_MINOR(instance->vk.app_info.api_version),
602 VK_VERSION_PATCH(instance->vk.app_info.api_version));
603
604 radv_dump_enabled_options(device, f);
605 }
606
607 static void
radv_dump_device_name(struct radv_device * device,FILE * f)608 radv_dump_device_name(struct radv_device *device, FILE *f)
609 {
610 struct radeon_info *info = &device->physical_device->rad_info;
611 #ifndef _WIN32
612 char kernel_version[128] = {0};
613 struct utsname uname_data;
614 #endif
615 const char *chip_name;
616
617 chip_name = device->ws->get_chip_name(device->ws);
618
619 #ifdef _WIN32
620 fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,
621 info->drm_major, info->drm_minor, info->drm_patchlevel);
622 #else
623 if (uname(&uname_data) == 0)
624 snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
625
626 fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,
627 info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
628 #endif
629 }
630
631 static void
radv_dump_umr_ring(struct radv_queue * queue,FILE * f)632 radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
633 {
634 enum ring_type ring = radv_queue_ring(queue);
635 struct radv_device *device = queue->device;
636 char cmd[128];
637
638 /* TODO: Dump compute ring. */
639 if (ring != RING_GFX)
640 return;
641
642 sprintf(cmd, "umr -R %s 2>&1",
643 device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
644
645 fprintf(f, "\nUMR GFX ring:\n\n");
646 radv_dump_cmd(cmd, f);
647 }
648
649 static void
radv_dump_umr_waves(struct radv_queue * queue,FILE * f)650 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
651 {
652 enum ring_type ring = radv_queue_ring(queue);
653 struct radv_device *device = queue->device;
654 char cmd[128];
655
656 /* TODO: Dump compute ring. */
657 if (ring != RING_GFX)
658 return;
659
660 sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
661 device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
662
663 fprintf(f, "\nUMR GFX waves:\n\n");
664 radv_dump_cmd(cmd, f);
665 }
666
667 static bool
radv_gpu_hang_occured(struct radv_queue * queue,enum ring_type ring)668 radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
669 {
670 struct radeon_winsys *ws = queue->device->ws;
671
672 if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
673 return true;
674
675 return false;
676 }
677
678 void
radv_check_gpu_hangs(struct radv_queue * queue,struct radeon_cmdbuf * cs)679 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
680 {
681 struct radv_device *device = queue->device;
682 enum ring_type ring;
683 uint64_t addr;
684
685 ring = radv_queue_ring(queue);
686
687 bool hang_occurred = radv_gpu_hang_occured(queue, ring);
688 bool vm_fault_occurred = false;
689 if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
690 vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
691 &device->dmesg_timestamp, &addr);
692 if (!hang_occurred && !vm_fault_occurred)
693 return;
694
695 fprintf(stderr, "radv: GPU hang detected...\n");
696
697 #ifndef _WIN32
698 /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
699 * various debugging info about that GPU hang.
700 */
701 struct tm *timep, result;
702 time_t raw_time;
703 FILE *f;
704 char dump_dir[256], dump_path[512], buf_time[128];
705
706 time(&raw_time);
707 timep = os_localtime(&raw_time, &result);
708 strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
709
710 snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
711 getpid(), buf_time);
712 if (mkdir(dump_dir, 0774) && errno != EEXIST) {
713 fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
714 abort();
715 }
716
717 fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
718
719 /* Dump trace file. */
720 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
721 f = fopen(dump_path, "w+");
722 if (f) {
723 radv_dump_trace(queue->device, cs, f);
724 fclose(f);
725 }
726
727 /* Dump pipeline state. */
728 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
729 f = fopen(dump_path, "w+");
730 if (f) {
731 radv_dump_queue_state(queue, dump_dir, f);
732 fclose(f);
733 }
734
735 if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
736 /* Dump UMR ring. */
737 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
738 f = fopen(dump_path, "w+");
739 if (f) {
740 radv_dump_umr_ring(queue, f);
741 fclose(f);
742 }
743
744 /* Dump UMR waves. */
745 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
746 f = fopen(dump_path, "w+");
747 if (f) {
748 radv_dump_umr_waves(queue, f);
749 fclose(f);
750 }
751 }
752
753 /* Dump debug registers. */
754 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
755 f = fopen(dump_path, "w+");
756 if (f) {
757 radv_dump_debug_registers(device, f);
758 fclose(f);
759 }
760
761 /* Dump BO ranges. */
762 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
763 f = fopen(dump_path, "w+");
764 if (f) {
765 device->ws->dump_bo_ranges(device->ws, f);
766 fclose(f);
767 }
768
769 /* Dump BO log. */
770 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
771 f = fopen(dump_path, "w+");
772 if (f) {
773 device->ws->dump_bo_log(device->ws, f);
774 fclose(f);
775 }
776
777 /* Dump VM fault info. */
778 if (vm_fault_occurred) {
779 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
780 f = fopen(dump_path, "w+");
781 if (f) {
782 fprintf(f, "VM fault report.\n\n");
783 fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
784 fclose(f);
785 }
786 }
787
788 /* Dump app info. */
789 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
790 f = fopen(dump_path, "w+");
791 if (f) {
792 radv_dump_app_info(device, f);
793 fclose(f);
794 }
795
796 /* Dump GPU info. */
797 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
798 f = fopen(dump_path, "w+");
799 if (f) {
800 radv_dump_device_name(device, f);
801 ac_print_gpu_info(&device->physical_device->rad_info, f);
802 fclose(f);
803 }
804
805 /* Dump dmesg. */
806 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
807 f = fopen(dump_path, "w+");
808 if (f) {
809 radv_dump_dmesg(f);
810 fclose(f);
811 }
812 #endif
813
814 fprintf(stderr, "radv: GPU hang report saved successfully!\n");
815 abort();
816 }
817
818 void
radv_print_spirv(const char * data,uint32_t size,FILE * fp)819 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
820 {
821 #ifndef _WIN32
822 char path[] = "/tmp/fileXXXXXX";
823 char command[128];
824 int fd;
825
826 /* Dump the binary into a temporary file. */
827 fd = mkstemp(path);
828 if (fd < 0)
829 return;
830
831 if (write(fd, data, size) == -1)
832 goto fail;
833
834 /* Disassemble using spirv-dis if installed. */
835 sprintf(command, "spirv-dis %s", path);
836 radv_dump_cmd(command, fp);
837
838 fail:
839 close(fd);
840 unlink(path);
841 #endif
842 }
843
844 bool
radv_trap_handler_init(struct radv_device * device)845 radv_trap_handler_init(struct radv_device *device)
846 {
847 struct radeon_winsys *ws = device->ws;
848 VkResult result;
849
850 /* Create the trap handler shader and upload it like other shaders. */
851 device->trap_handler_shader = radv_create_trap_handler_shader(device);
852 if (!device->trap_handler_shader) {
853 fprintf(stderr, "radv: failed to create the trap handler shader.\n");
854 return false;
855 }
856
857 result = ws->buffer_make_resident(ws, device->trap_handler_shader->alloc->arena->bo, true);
858 if (result != VK_SUCCESS)
859 return false;
860
861 result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
862 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
863 RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
864 RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
865 if (result != VK_SUCCESS)
866 return false;
867
868 result = ws->buffer_make_resident(ws, device->tma_bo, true);
869 if (result != VK_SUCCESS)
870 return false;
871
872 device->tma_ptr = ws->buffer_map(device->tma_bo);
873 if (!device->tma_ptr)
874 return false;
875
876 /* Upload a buffer descriptor to store various info from the trap. */
877 uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
878 uint32_t desc[4];
879
880 desc[0] = tma_va;
881 desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
882 desc[2] = TMA_BO_SIZE;
883 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
884 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
885 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
886
887 memcpy(device->tma_ptr, desc, sizeof(desc));
888
889 return true;
890 }
891
892 void
radv_trap_handler_finish(struct radv_device * device)893 radv_trap_handler_finish(struct radv_device *device)
894 {
895 struct radeon_winsys *ws = device->ws;
896
897 if (unlikely(device->trap_handler_shader)) {
898 ws->buffer_make_resident(ws, device->trap_handler_shader->alloc->arena->bo, false);
899 radv_trap_handler_shader_destroy(device, device->trap_handler_shader);
900 }
901
902 if (unlikely(device->tma_bo)) {
903 ws->buffer_make_resident(ws, device->tma_bo, false);
904 ws->buffer_destroy(ws, device->tma_bo);
905 }
906 }
907
908 static void
radv_dump_faulty_shader(struct radv_device * device,uint64_t faulty_pc)909 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
910 {
911 struct radv_shader *shader;
912 uint64_t start_addr, end_addr;
913 uint32_t instr_offset;
914
915 shader = radv_find_shader(device, faulty_pc);
916 if (!shader)
917 return;
918
919 start_addr = radv_shader_get_va(shader);
920 end_addr = start_addr + shader->code_size;
921 instr_offset = faulty_pc - start_addr;
922
923 fprintf(stderr,
924 "Faulty shader found "
925 "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
926 start_addr, end_addr, instr_offset);
927
928 /* Get the list of instructions.
929 * Buffer size / 4 is the upper bound of the instruction count.
930 */
931 unsigned num_inst = 0;
932 struct radv_shader_inst *instructions =
933 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
934
935 /* Split the disassembly string into instructions. */
936 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
937
938 /* Print instructions with annotations. */
939 for (unsigned i = 0; i < num_inst; i++) {
940 struct radv_shader_inst *inst = &instructions[i];
941
942 if (start_addr + inst->offset == faulty_pc) {
943 fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
944 fprintf(stderr, "%s\n", inst->text);
945 fprintf(stderr, "\n");
946 } else {
947 fprintf(stderr, "%s\n", inst->text);
948 }
949 }
950
951 free(instructions);
952 }
953
954 struct radv_sq_hw_reg {
955 uint32_t status;
956 uint32_t trap_sts;
957 uint32_t hw_id;
958 uint32_t ib_sts;
959 };
960
961 static void
radv_dump_sq_hw_regs(struct radv_device * device)962 radv_dump_sq_hw_regs(struct radv_device *device)
963 {
964 struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
965
966 fprintf(stderr, "\nHardware registers:\n");
967 if (device->physical_device->rad_info.chip_class >= GFX10) {
968 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
969 regs->status, ~0);
970 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
971 regs->trap_sts, ~0);
972 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
973 regs->hw_id, ~0);
974 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
975 regs->ib_sts, ~0);
976 } else {
977 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
978 regs->status, ~0);
979 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
980 regs->trap_sts, ~0);
981 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
982 regs->hw_id, ~0);
983 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
984 regs->ib_sts, ~0);
985 }
986 fprintf(stderr, "\n\n");
987 }
988
989 void
radv_check_trap_handler(struct radv_queue * queue)990 radv_check_trap_handler(struct radv_queue *queue)
991 {
992 enum ring_type ring = radv_queue_ring(queue);
993 struct radv_device *device = queue->device;
994 struct radeon_winsys *ws = device->ws;
995
996 /* Wait for the context to be idle in a finite time. */
997 ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
998
999 /* Try to detect if the trap handler has been reached by the hw by
1000 * looking at ttmp0 which should be non-zero if a shader exception
1001 * happened.
1002 */
1003 if (!device->tma_ptr[4])
1004 return;
1005
1006 #if 0
1007 fprintf(stderr, "tma_ptr:\n");
1008 for (unsigned i = 0; i < 10; i++)
1009 fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
1010 #endif
1011
1012 radv_dump_sq_hw_regs(device);
1013
1014 uint32_t ttmp0 = device->tma_ptr[4];
1015 uint32_t ttmp1 = device->tma_ptr[5];
1016
1017 /* According to the ISA docs, 3.10 Trap and Exception Registers:
1018 *
1019 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
1020 *
1021 * "When the trap handler is entered, the PC of the faulting
1022 * instruction is: (PC - PC_rewind * 4)."
1023 * */
1024 uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1025 uint8_t ht = (ttmp1 >> 24) & 0x1;
1026 uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1027 uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1028
1029 fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
1030 pc_rewind);
1031
1032 radv_dump_faulty_shader(device, pc);
1033
1034 abort();
1035 }
1036