1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "common/intel_decoder.h"
25 #include "intel_disasm.h"
26 #include "util/macros.h"
27 #include "main/macros.h" /* Needed for ROUND_DOWN_TO */
28 
29 #include <string.h>
30 
31 void
intel_batch_decode_ctx_init(struct intel_batch_decode_ctx * ctx,const struct intel_device_info * devinfo,FILE * fp,enum intel_batch_decode_flags flags,const char * xml_path,struct intel_batch_decode_bo (* get_bo)(void *,bool,uint64_t),unsigned (* get_state_size)(void *,uint64_t,uint64_t),void * user_data)32 intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
33                             const struct intel_device_info *devinfo,
34                             FILE *fp, enum intel_batch_decode_flags flags,
35                             const char *xml_path,
36                             struct intel_batch_decode_bo (*get_bo)(void *,
37                                                                    bool,
38                                                                    uint64_t),
39                             unsigned (*get_state_size)(void *, uint64_t,
40                                                        uint64_t),
41                             void *user_data)
42 {
43    memset(ctx, 0, sizeof(*ctx));
44 
45    ctx->devinfo = *devinfo;
46    ctx->get_bo = get_bo;
47    ctx->get_state_size = get_state_size;
48    ctx->user_data = user_data;
49    ctx->fp = fp;
50    ctx->flags = flags;
51    ctx->max_vbo_decoded_lines = -1; /* No limit! */
52    ctx->engine = I915_ENGINE_CLASS_RENDER;
53 
54    if (xml_path == NULL)
55       ctx->spec = intel_spec_load(devinfo);
56    else
57       ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
58 }
59 
60 void
intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx * ctx)61 intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
62 {
63    intel_spec_destroy(ctx->spec);
64 }
65 
66 #define CSI "\e["
67 #define RED_COLOR    CSI "31m"
68 #define BLUE_HEADER  CSI "0;44m" CSI "1;37m"
69 #define GREEN_HEADER CSI "1;42m"
70 #define NORMAL       CSI "0m"
71 
72 static void
ctx_print_group(struct intel_batch_decode_ctx * ctx,struct intel_group * group,uint64_t address,const void * map)73 ctx_print_group(struct intel_batch_decode_ctx *ctx,
74                 struct intel_group *group,
75                 uint64_t address, const void *map)
76 {
77    intel_print_group(ctx->fp, group, address, map, 0,
78                    (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
79 }
80 
81 static struct intel_batch_decode_bo
ctx_get_bo(struct intel_batch_decode_ctx * ctx,bool ppgtt,uint64_t addr)82 ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
83 {
84    if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
85       /* On Broadwell and above, we have 48-bit addresses which consume two
86        * dwords.  Some packets require that these get stored in a "canonical
87        * form" which means that bit 47 is sign-extended through the upper
88        * bits. In order to correctly handle those aub dumps, we need to mask
89        * off the top 16 bits.
90        */
91       addr &= (~0ull >> 16);
92    }
93 
94    struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
95 
96    if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
97       bo.addr &= (~0ull >> 16);
98 
99    /* We may actually have an offset into the bo */
100    if (bo.map != NULL) {
101       assert(bo.addr <= addr);
102       uint64_t offset = addr - bo.addr;
103       bo.map += offset;
104       bo.addr += offset;
105       bo.size -= offset;
106    }
107 
108    return bo;
109 }
110 
111 static int
update_count(struct intel_batch_decode_ctx * ctx,uint64_t address,uint64_t base_address,unsigned element_dwords,unsigned guess)112 update_count(struct intel_batch_decode_ctx *ctx,
113              uint64_t address,
114              uint64_t base_address,
115              unsigned element_dwords,
116              unsigned guess)
117 {
118    unsigned size = 0;
119 
120    if (ctx->get_state_size)
121       size = ctx->get_state_size(ctx->user_data, address, base_address);
122 
123    if (size > 0)
124       return size / (sizeof(uint32_t) * element_dwords);
125 
126    /* In the absence of any information, just guess arbitrarily. */
127    return guess;
128 }
129 
130 static void
ctx_disassemble_program(struct intel_batch_decode_ctx * ctx,uint32_t ksp,const char * type)131 ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
132                         uint32_t ksp, const char *type)
133 {
134    uint64_t addr = ctx->instruction_base + ksp;
135    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
136    if (!bo.map)
137       return;
138 
139    fprintf(ctx->fp, "\nReferenced %s:\n", type);
140    intel_disassemble(&ctx->devinfo, bo.map, 0, ctx->fp);
141 }
142 
143 /* Heuristic to determine whether a uint32_t is probably actually a float
144  * (http://stackoverflow.com/a/2953466)
145  */
146 
147 static bool
probably_float(uint32_t bits)148 probably_float(uint32_t bits)
149 {
150    int exp = ((bits & 0x7f800000U) >> 23) - 127;
151    uint32_t mant = bits & 0x007fffff;
152 
153    /* +- 0.0 */
154    if (exp == -127 && mant == 0)
155       return true;
156 
157    /* +- 1 billionth to 1 billion */
158    if (-30 <= exp && exp <= 30)
159       return true;
160 
161    /* some value with only a few binary digits */
162    if ((mant & 0x0000ffff) == 0)
163       return true;
164 
165    return false;
166 }
167 
168 static void
ctx_print_buffer(struct intel_batch_decode_ctx * ctx,struct intel_batch_decode_bo bo,uint32_t read_length,uint32_t pitch,int max_lines)169 ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
170                  struct intel_batch_decode_bo bo,
171                  uint32_t read_length,
172                  uint32_t pitch,
173                  int max_lines)
174 {
175    const uint32_t *dw_end =
176          bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
177 
178    int column_count = 0, pitch_col_count = 0, line_count = -1;
179    for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
180       if (pitch_col_count * 4 == pitch || column_count == 8) {
181          fprintf(ctx->fp, "\n");
182          column_count = 0;
183          if (pitch_col_count * 4 == pitch)
184             pitch_col_count = 0;
185          line_count++;
186 
187          if (max_lines >= 0 && line_count >= max_lines)
188             break;
189       }
190       fprintf(ctx->fp, column_count == 0 ? "  " : " ");
191 
192       if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
193          fprintf(ctx->fp, "  %8.2f", *(float *) dw);
194       else
195          fprintf(ctx->fp, "  0x%08x", *dw);
196 
197       column_count++;
198       pitch_col_count++;
199    }
200    fprintf(ctx->fp, "\n");
201 }
202 
203 static struct intel_group *
intel_ctx_find_instruction(struct intel_batch_decode_ctx * ctx,const uint32_t * p)204 intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
205 {
206    return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
207 }
208 
209 static void
handle_state_base_address(struct intel_batch_decode_ctx * ctx,const uint32_t * p)210 handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
211 {
212    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
213 
214    struct intel_field_iterator iter;
215    intel_field_iterator_init(&iter, inst, p, 0, false);
216 
217    uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
218    bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
219 
220    while (intel_field_iterator_next(&iter)) {
221       if (strcmp(iter.name, "Surface State Base Address") == 0) {
222          surface_base = iter.raw_value;
223       } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
224          dynamic_base = iter.raw_value;
225       } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
226          instruction_base = iter.raw_value;
227       } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
228          surface_modify = iter.raw_value;
229       } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
230          dynamic_modify = iter.raw_value;
231       } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
232          instruction_modify = iter.raw_value;
233       }
234    }
235 
236    if (dynamic_modify)
237       ctx->dynamic_base = dynamic_base;
238 
239    if (surface_modify)
240       ctx->surface_base = surface_base;
241 
242    if (instruction_modify)
243       ctx->instruction_base = instruction_base;
244 }
245 
246 static void
handle_binding_table_pool_alloc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)247 handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
248                                 const uint32_t *p)
249 {
250    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
251 
252    struct intel_field_iterator iter;
253    intel_field_iterator_init(&iter, inst, p, 0, false);
254 
255    uint64_t bt_pool_base = 0;
256    bool bt_pool_enable = false;
257 
258    while (intel_field_iterator_next(&iter)) {
259       if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
260          bt_pool_base = iter.raw_value;
261       } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
262          bt_pool_enable = iter.raw_value;
263       }
264    }
265 
266    if (bt_pool_enable) {
267       ctx->bt_pool_base = bt_pool_base;
268    } else {
269       ctx->bt_pool_base = 0;
270    }
271 }
272 
273 static void
dump_binding_table(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)274 dump_binding_table(struct intel_batch_decode_ctx *ctx,
275                    uint32_t offset, int count)
276 {
277    struct intel_group *strct =
278       intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
279    if (strct == NULL) {
280       fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
281       return;
282    }
283 
284    /* When 256B binding tables are enabled, we have to shift the offset */
285    if (ctx->use_256B_binding_tables)
286       offset <<= 3;
287 
288    const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
289                                                      ctx->surface_base;
290 
291    if (count < 0) {
292       count = update_count(ctx, bt_pool_base + offset,
293                            bt_pool_base, 1, 8);
294    }
295 
296    if (offset % 32 != 0 || offset >= UINT16_MAX) {
297       fprintf(ctx->fp, "  invalid binding table pointer\n");
298       return;
299    }
300 
301    struct intel_batch_decode_bo bind_bo =
302       ctx_get_bo(ctx, true, bt_pool_base + offset);
303 
304    if (bind_bo.map == NULL) {
305       fprintf(ctx->fp, "  binding table unavailable\n");
306       return;
307    }
308 
309    const uint32_t *pointers = bind_bo.map;
310    for (int i = 0; i < count; i++) {
311       if (pointers[i] == 0)
312          continue;
313 
314       uint64_t addr = ctx->surface_base + pointers[i];
315       struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
316       uint32_t size = strct->dw_length * 4;
317 
318       if (pointers[i] % 32 != 0 ||
319           addr < bo.addr || addr + size >= bo.addr + bo.size) {
320          fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
321          continue;
322       }
323 
324       fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
325       ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
326    }
327 }
328 
329 static void
dump_samplers(struct intel_batch_decode_ctx * ctx,uint32_t offset,int count)330 dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
331 {
332    struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
333    uint64_t state_addr = ctx->dynamic_base + offset;
334 
335    assert(count > 0);
336 
337    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
338    const void *state_map = bo.map;
339 
340    if (state_map == NULL) {
341       fprintf(ctx->fp, "  samplers unavailable\n");
342       return;
343    }
344 
345    if (offset % 32 != 0) {
346       fprintf(ctx->fp, "  invalid sampler state pointer\n");
347       return;
348    }
349 
350    const unsigned sampler_state_size = strct->dw_length * 4;
351 
352    if (count * sampler_state_size >= bo.size) {
353       fprintf(ctx->fp, "  sampler state ends after bo ends\n");
354       assert(!"sampler state ends after bo ends");
355       return;
356    }
357 
358    for (int i = 0; i < count; i++) {
359       fprintf(ctx->fp, "sampler state %d\n", i);
360       ctx_print_group(ctx, strct, state_addr, state_map);
361       state_addr += sampler_state_size;
362       state_map += sampler_state_size;
363    }
364 }
365 
366 static void
handle_interface_descriptor_data(struct intel_batch_decode_ctx * ctx,struct intel_group * desc,const uint32_t * p)367 handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
368                                  struct intel_group *desc, const uint32_t *p)
369 {
370    uint64_t ksp = 0;
371    uint32_t sampler_offset = 0, sampler_count = 0;
372    uint32_t binding_table_offset = 0, binding_entry_count = 0;
373 
374    struct intel_field_iterator iter;
375    intel_field_iterator_init(&iter, desc, p, 0, false);
376    while (intel_field_iterator_next(&iter)) {
377       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
378          ksp = strtoll(iter.value, NULL, 16);
379       } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
380          sampler_offset = strtol(iter.value, NULL, 16);
381       } else if (strcmp(iter.name, "Sampler Count") == 0) {
382          sampler_count = strtol(iter.value, NULL, 10);
383       } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
384          binding_table_offset = strtol(iter.value, NULL, 16);
385       } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
386          binding_entry_count = strtol(iter.value, NULL, 10);
387       }
388    }
389 
390    ctx_disassemble_program(ctx, ksp, "compute shader");
391    fprintf(ctx->fp, "\n");
392 
393    if (sampler_count)
394       dump_samplers(ctx, sampler_offset, sampler_count);
395    if (binding_entry_count)
396       dump_binding_table(ctx, binding_table_offset, binding_entry_count);
397 }
398 
399 static void
handle_media_interface_descriptor_load(struct intel_batch_decode_ctx * ctx,const uint32_t * p)400 handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
401                                        const uint32_t *p)
402 {
403    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
404    struct intel_group *desc =
405       intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
406 
407    struct intel_field_iterator iter;
408    intel_field_iterator_init(&iter, inst, p, 0, false);
409    uint32_t descriptor_offset = 0;
410    int descriptor_count = 0;
411    while (intel_field_iterator_next(&iter)) {
412       if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
413          descriptor_offset = strtol(iter.value, NULL, 16);
414       } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
415          descriptor_count =
416             strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
417       }
418    }
419 
420    uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
421    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
422    const void *desc_map = bo.map;
423 
424    if (desc_map == NULL) {
425       fprintf(ctx->fp, "  interface descriptors unavailable\n");
426       return;
427    }
428 
429    for (int i = 0; i < descriptor_count; i++) {
430       fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
431 
432       ctx_print_group(ctx, desc, desc_addr, desc_map);
433 
434       handle_interface_descriptor_data(ctx, desc, desc_map);
435 
436       desc_map += desc->dw_length;
437       desc_addr += desc->dw_length * 4;
438    }
439 }
440 
441 static void
handle_compute_walker(struct intel_batch_decode_ctx * ctx,const uint32_t * p)442 handle_compute_walker(struct intel_batch_decode_ctx *ctx,
443                       const uint32_t *p)
444 {
445    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
446 
447    struct intel_field_iterator iter;
448    intel_field_iterator_init(&iter, inst, p, 0, false);
449    while (intel_field_iterator_next(&iter)) {
450       if (strcmp(iter.name, "Interface Descriptor") == 0) {
451          handle_interface_descriptor_data(ctx, iter.struct_desc,
452                                           &iter.p[iter.start_bit / 32]);
453       }
454    }
455 }
456 
457 static void
handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)458 handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
459                               const uint32_t *p)
460 {
461    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
462    struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
463 
464    struct intel_batch_decode_bo vb = {};
465    uint32_t vb_size = 0;
466    int index = -1;
467    int pitch = -1;
468    bool ready = false;
469 
470    struct intel_field_iterator iter;
471    intel_field_iterator_init(&iter, inst, p, 0, false);
472    while (intel_field_iterator_next(&iter)) {
473       if (iter.struct_desc != vbs)
474          continue;
475 
476       struct intel_field_iterator vbs_iter;
477       intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
478       while (intel_field_iterator_next(&vbs_iter)) {
479          if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
480             index = vbs_iter.raw_value;
481          } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
482             pitch = vbs_iter.raw_value;
483          } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
484             vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
485          } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
486             vb_size = vbs_iter.raw_value;
487             ready = true;
488          } else if (strcmp(vbs_iter.name, "End Address") == 0) {
489             if (vb.map && vbs_iter.raw_value >= vb.addr)
490                vb_size = (vbs_iter.raw_value + 1) - vb.addr;
491             else
492                vb_size = 0;
493             ready = true;
494          }
495 
496          if (!ready)
497             continue;
498 
499          fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
500 
501          if (vb.map == NULL) {
502             fprintf(ctx->fp, "  buffer contents unavailable\n");
503             continue;
504          }
505 
506          if (vb.map == 0 || vb_size == 0)
507             continue;
508 
509          ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
510 
511          vb.map = NULL;
512          vb_size = 0;
513          index = -1;
514          pitch = -1;
515          ready = false;
516       }
517    }
518 }
519 
520 static void
handle_3dstate_index_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)521 handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
522                             const uint32_t *p)
523 {
524    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
525 
526    struct intel_batch_decode_bo ib = {};
527    uint32_t ib_size = 0;
528    uint32_t format = 0;
529 
530    struct intel_field_iterator iter;
531    intel_field_iterator_init(&iter, inst, p, 0, false);
532    while (intel_field_iterator_next(&iter)) {
533       if (strcmp(iter.name, "Index Format") == 0) {
534          format = iter.raw_value;
535       } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
536          ib = ctx_get_bo(ctx, true, iter.raw_value);
537       } else if (strcmp(iter.name, "Buffer Size") == 0) {
538          ib_size = iter.raw_value;
539       }
540    }
541 
542    if (ib.map == NULL) {
543       fprintf(ctx->fp, "  buffer contents unavailable\n");
544       return;
545    }
546 
547    const void *m = ib.map;
548    const void *ib_end = ib.map + MIN2(ib.size, ib_size);
549    for (int i = 0; m < ib_end && i < 10; i++) {
550       switch (format) {
551       case 0:
552          fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
553          m += 1;
554          break;
555       case 1:
556          fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
557          m += 2;
558          break;
559       case 2:
560          fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
561          m += 4;
562          break;
563       }
564    }
565 
566    if (m < ib_end)
567       fprintf(ctx->fp, "...");
568    fprintf(ctx->fp, "\n");
569 }
570 
571 static void
decode_single_ksp(struct intel_batch_decode_ctx * ctx,const uint32_t * p)572 decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
573 {
574    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
575 
576    uint64_t ksp = 0;
577    bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
578    bool is_enabled = true;
579 
580    struct intel_field_iterator iter;
581    intel_field_iterator_init(&iter, inst, p, 0, false);
582    while (intel_field_iterator_next(&iter)) {
583       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
584          ksp = iter.raw_value;
585       } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
586          is_simd8 = iter.raw_value;
587       } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
588          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
589       } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
590          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
591       } else if (strcmp(iter.name, "Enable") == 0) {
592          is_enabled = iter.raw_value;
593       }
594    }
595 
596    const char *type =
597       strcmp(inst->name,   "VS_STATE") == 0 ? "vertex shader" :
598       strcmp(inst->name,   "GS_STATE") == 0 ? "geometry shader" :
599       strcmp(inst->name,   "SF_STATE") == 0 ? "strips and fans shader" :
600       strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
601       strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
602       strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
603       strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
604       strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
605       NULL;
606 
607    if (is_enabled) {
608       ctx_disassemble_program(ctx, ksp, type);
609       fprintf(ctx->fp, "\n");
610    }
611 }
612 
613 static void
decode_ps_kern(struct intel_batch_decode_ctx * ctx,struct intel_group * inst,const uint32_t * p)614 decode_ps_kern(struct intel_batch_decode_ctx *ctx,
615                struct intel_group *inst, const uint32_t *p)
616 {
617    bool single_ksp = ctx->devinfo.ver == 4;
618    uint64_t ksp[3] = {0, 0, 0};
619    bool enabled[3] = {false, false, false};
620 
621    struct intel_field_iterator iter;
622    intel_field_iterator_init(&iter, inst, p, 0, false);
623    while (intel_field_iterator_next(&iter)) {
624       if (strncmp(iter.name, "Kernel Start Pointer ",
625                   strlen("Kernel Start Pointer ")) == 0) {
626          int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
627          ksp[idx] = strtol(iter.value, NULL, 16);
628       } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
629          enabled[0] = strcmp(iter.value, "true") == 0;
630       } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
631          enabled[1] = strcmp(iter.value, "true") == 0;
632       } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
633          enabled[2] = strcmp(iter.value, "true") == 0;
634       }
635    }
636 
637    if (single_ksp)
638       ksp[1] = ksp[2] = ksp[0];
639 
640    /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
641    if (enabled[0] + enabled[1] + enabled[2] == 1) {
642       if (enabled[1]) {
643          ksp[1] = ksp[0];
644          ksp[0] = 0;
645       } else if (enabled[2]) {
646          ksp[2] = ksp[0];
647          ksp[0] = 0;
648       }
649    } else {
650       uint64_t tmp = ksp[1];
651       ksp[1] = ksp[2];
652       ksp[2] = tmp;
653    }
654 
655    if (enabled[0])
656       ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader");
657    if (enabled[1])
658       ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader");
659    if (enabled[2])
660       ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader");
661 
662    if (enabled[0] || enabled[1] || enabled[2])
663       fprintf(ctx->fp, "\n");
664 }
665 
666 static void
decode_ps_kernels(struct intel_batch_decode_ctx * ctx,const uint32_t * p)667 decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
668                   const uint32_t *p)
669 {
670    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
671    decode_ps_kern(ctx, inst, p);
672 }
673 
674 static void
decode_3dstate_constant_all(struct intel_batch_decode_ctx * ctx,const uint32_t * p)675 decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
676 {
677    struct intel_group *inst =
678       intel_spec_find_instruction(ctx->spec, ctx->engine, p);
679    struct intel_group *body =
680       intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
681 
682    uint32_t read_length[4];
683    struct intel_batch_decode_bo buffer[4];
684    memset(buffer, 0, sizeof(buffer));
685 
686    struct intel_field_iterator outer;
687    intel_field_iterator_init(&outer, inst, p, 0, false);
688    int idx = 0;
689    while (intel_field_iterator_next(&outer)) {
690       if (outer.struct_desc != body)
691          continue;
692 
693       struct intel_field_iterator iter;
694       intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
695                               0, false);
696       while (intel_field_iterator_next(&iter)) {
697          if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
698             buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
699          } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
700             read_length[idx] = iter.raw_value;
701          }
702       }
703       idx++;
704    }
705 
706    for (int i = 0; i < 4; i++) {
707       if (read_length[i] == 0 || buffer[i].map == NULL)
708          continue;
709 
710       unsigned size = read_length[i] * 32;
711       fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
712 
713       ctx_print_buffer(ctx, buffer[i], size, 0, -1);
714    }
715 }
716 
717 static void
decode_3dstate_constant(struct intel_batch_decode_ctx * ctx,const uint32_t * p)718 decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
719 {
720    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
721    struct intel_group *body =
722       intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
723 
724    uint32_t read_length[4] = {0};
725    uint64_t read_addr[4];
726 
727    struct intel_field_iterator outer;
728    intel_field_iterator_init(&outer, inst, p, 0, false);
729    while (intel_field_iterator_next(&outer)) {
730       if (outer.struct_desc != body)
731          continue;
732 
733       struct intel_field_iterator iter;
734       intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
735                               0, false);
736 
737       while (intel_field_iterator_next(&iter)) {
738          int idx;
739          if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
740             read_length[idx] = iter.raw_value;
741          } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
742             read_addr[idx] = iter.raw_value;
743          }
744       }
745 
746       for (int i = 0; i < 4; i++) {
747          if (read_length[i] == 0)
748             continue;
749 
750          struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
751          if (!buffer.map) {
752             fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
753             continue;
754          }
755 
756          unsigned size = read_length[i] * 32;
757          fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
758 
759          ctx_print_buffer(ctx, buffer, size, 0, -1);
760       }
761    }
762 }
763 
764 static void
decode_gfx4_constant_buffer(struct intel_batch_decode_ctx * ctx,const uint32_t * p)765 decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
766 {
767    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
768    uint64_t read_length = 0, read_addr = 0, valid = 0;
769    struct intel_field_iterator iter;
770    intel_field_iterator_init(&iter, inst, p, 0, false);
771 
772    while (intel_field_iterator_next(&iter)) {
773       if (!strcmp(iter.name, "Buffer Length")) {
774          read_length = iter.raw_value;
775       } else if (!strcmp(iter.name, "Valid")) {
776          valid = iter.raw_value;
777       } else if (!strcmp(iter.name, "Buffer Starting Address")) {
778          read_addr = iter.raw_value;
779       }
780    }
781 
782    if (!valid)
783       return;
784 
785    struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
786    if (!buffer.map) {
787       fprintf(ctx->fp, "constant buffer unavailable\n");
788       return;
789    }
790    unsigned size = (read_length + 1) * 16 * sizeof(float);
791    fprintf(ctx->fp, "constant buffer size %u\n", size);
792 
793    ctx_print_buffer(ctx, buffer, size, 0, -1);
794 }
795 
796 
797 static void
decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)798 decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
799                                            const uint32_t *p)
800 {
801    fprintf(ctx->fp, "VS Binding Table:\n");
802    dump_binding_table(ctx, p[1], -1);
803 
804    fprintf(ctx->fp, "GS Binding Table:\n");
805    dump_binding_table(ctx, p[2], -1);
806 
807    if (ctx->devinfo.ver < 6) {
808       fprintf(ctx->fp, "CLIP Binding Table:\n");
809       dump_binding_table(ctx, p[3], -1);
810       fprintf(ctx->fp, "SF Binding Table:\n");
811       dump_binding_table(ctx, p[4], -1);
812       fprintf(ctx->fp, "PS Binding Table:\n");
813       dump_binding_table(ctx, p[5], -1);
814    } else {
815       fprintf(ctx->fp, "PS Binding Table:\n");
816       dump_binding_table(ctx, p[3], -1);
817    }
818 }
819 
820 static void
decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)821 decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
822                                       const uint32_t *p)
823 {
824    dump_binding_table(ctx, p[1], -1);
825 }
826 
827 static void
decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)828 decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
829                                       const uint32_t *p)
830 {
831    dump_samplers(ctx, p[1], 1);
832 }
833 
834 static void
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx * ctx,const uint32_t * p)835 decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
836                                            const uint32_t *p)
837 {
838    dump_samplers(ctx, p[1], 1);
839    dump_samplers(ctx, p[2], 1);
840    dump_samplers(ctx, p[3], 1);
841 }
842 
843 static bool
str_ends_with(const char * str,const char * end)844 str_ends_with(const char *str, const char *end)
845 {
846    int offset = strlen(str) - strlen(end);
847    if (offset < 0)
848       return false;
849 
850    return strcmp(str + offset, end) == 0;
851 }
852 
853 static void
decode_dynamic_state(struct intel_batch_decode_ctx * ctx,const char * struct_type,uint32_t state_offset,int count)854 decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
855                        const char *struct_type, uint32_t state_offset,
856                        int count)
857 {
858    uint64_t state_addr = ctx->dynamic_base + state_offset;
859    struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
860    const void *state_map = bo.map;
861 
862    if (state_map == NULL) {
863       fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
864       return;
865    }
866 
867    struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
868    if (strcmp(struct_type, "BLEND_STATE") == 0) {
869       /* Blend states are different from the others because they have a header
870        * struct called BLEND_STATE which is followed by a variable number of
871        * BLEND_STATE_ENTRY structs.
872        */
873       fprintf(ctx->fp, "%s\n", struct_type);
874       ctx_print_group(ctx, state, state_addr, state_map);
875 
876       state_addr += state->dw_length * 4;
877       state_map += state->dw_length * 4;
878 
879       struct_type = "BLEND_STATE_ENTRY";
880       state = intel_spec_find_struct(ctx->spec, struct_type);
881    }
882 
883    count = update_count(ctx, ctx->dynamic_base + state_offset,
884                         ctx->dynamic_base, state->dw_length, count);
885 
886    for (int i = 0; i < count; i++) {
887       fprintf(ctx->fp, "%s %d\n", struct_type, i);
888       ctx_print_group(ctx, state, state_addr, state_map);
889 
890       state_addr += state->dw_length * 4;
891       state_map += state->dw_length * 4;
892    }
893 }
894 
895 static void
decode_dynamic_state_pointers(struct intel_batch_decode_ctx * ctx,const char * struct_type,const uint32_t * p,int count)896 decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
897                               const char *struct_type, const uint32_t *p,
898                               int count)
899 {
900    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
901 
902    uint32_t state_offset = 0;
903 
904    struct intel_field_iterator iter;
905    intel_field_iterator_init(&iter, inst, p, 0, false);
906    while (intel_field_iterator_next(&iter)) {
907       if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
908          state_offset = iter.raw_value;
909          break;
910       }
911    }
912    decode_dynamic_state(ctx, struct_type, state_offset, count);
913 }
914 
915 static void
decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)916 decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
917                                        const uint32_t *p)
918 {
919    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
920    uint32_t state_offset = 0;
921    bool clip = false, sf = false, cc = false;
922    struct intel_field_iterator iter;
923    intel_field_iterator_init(&iter, inst, p, 0, false);
924    while (intel_field_iterator_next(&iter)) {
925       if (!strcmp(iter.name, "CLIP Viewport State Change"))
926          clip = iter.raw_value;
927       if (!strcmp(iter.name, "SF Viewport State Change"))
928          sf = iter.raw_value;
929       if (!strcmp(iter.name, "CC Viewport State Change"))
930          cc = iter.raw_value;
931       else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
932          state_offset = iter.raw_value;
933          decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
934       }
935       else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
936          state_offset = iter.raw_value;
937          decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
938       }
939       else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
940          state_offset = iter.raw_value;
941          decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
942       }
943    }
944 }
945 
946 static void
decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx * ctx,const uint32_t * p)947 decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
948                                           const uint32_t *p)
949 {
950    decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
951 }
952 
953 static void
decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx * ctx,const uint32_t * p)954 decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
955                                                const uint32_t *p)
956 {
957    decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
958 }
959 
960 static void
decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)961 decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
962                                     const uint32_t *p)
963 {
964    decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
965 }
966 
967 static void
decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)968 decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
969                                  const uint32_t *p)
970 {
971    if (ctx->devinfo.ver != 6) {
972       decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
973       return;
974    }
975 
976    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
977 
978    uint32_t state_offset = 0;
979    bool blend_change = false, ds_change = false, cc_change = false;
980    struct intel_field_iterator iter;
981    intel_field_iterator_init(&iter, inst, p, 0, false);
982    while (intel_field_iterator_next(&iter)) {
983       if (!strcmp(iter.name, "BLEND_STATE Change"))
984          blend_change = iter.raw_value;
985       else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
986          ds_change = iter.raw_value;
987       else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
988          cc_change = iter.raw_value;
989       else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
990          state_offset = iter.raw_value;
991          decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
992       }
993       else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
994          state_offset = iter.raw_value;
995          decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
996       }
997       else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
998          state_offset = iter.raw_value;
999          decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1000       }
1001    }
1002 }
1003 
1004 static void
decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1005 decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1006                                  const uint32_t *p)
1007 {
1008    decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1009 }
1010 
1011 static void
decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1012 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1013                                       const uint32_t *p)
1014 {
1015    decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1016 }
1017 
1018 static void
decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1019 decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1020                                           const uint32_t *p)
1021 {
1022    decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1023 }
1024 
1025 static void
handle_gt_mode(struct intel_batch_decode_ctx * ctx,uint32_t reg_addr,uint32_t val)1026 handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1027                uint32_t reg_addr, uint32_t val)
1028 {
1029    struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1030 
1031    assert(intel_group_get_length(reg, &val) == 1);
1032 
1033    struct intel_field_iterator iter;
1034    intel_field_iterator_init(&iter, reg, &val, 0, false);
1035 
1036    uint32_t bt_alignment;
1037    bool bt_alignment_mask = 0;
1038 
1039    while (intel_field_iterator_next(&iter)) {
1040       if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1041          bt_alignment = iter.raw_value;
1042       } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1043          bt_alignment_mask = iter.raw_value;
1044       }
1045    }
1046 
1047    if (bt_alignment_mask)
1048       ctx->use_256B_binding_tables = bt_alignment;
1049 }
1050 
1051 struct reg_handler {
1052    const char *name;
1053    void (*handler)(struct intel_batch_decode_ctx *ctx,
1054                    uint32_t reg_addr, uint32_t val);
1055 } reg_handlers[] = {
1056    { "GT_MODE", handle_gt_mode }
1057 };
1058 
1059 static void
decode_load_register_imm(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1060 decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1061 {
1062    struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1063    const unsigned length = intel_group_get_length(inst, p);
1064    assert(length & 1);
1065    const unsigned nr_regs = (length - 1) / 2;
1066 
1067    for (unsigned i = 0; i < nr_regs; i++) {
1068       struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1069       if (reg != NULL) {
1070          fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1071                  reg->name, reg->register_offset, p[2]);
1072          ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1073 
1074          for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1075             if (strcmp(reg->name, reg_handlers[i].name) == 0)
1076                reg_handlers[i].handler(ctx, p[1], p[2]);
1077          }
1078       }
1079    }
1080 }
1081 
1082 static void
decode_vs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1083 decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1084 {
1085    struct intel_group *strct =
1086       intel_spec_find_struct(ctx->spec, "VS_STATE");
1087    if (strct == NULL) {
1088       fprintf(ctx->fp, "did not find VS_STATE info\n");
1089       return;
1090    }
1091 
1092    struct intel_batch_decode_bo bind_bo =
1093       ctx_get_bo(ctx, true, offset);
1094 
1095    if (bind_bo.map == NULL) {
1096       fprintf(ctx->fp, " vs state unavailable\n");
1097       return;
1098    }
1099 
1100    ctx_print_group(ctx, strct, offset, bind_bo.map);
1101 
1102    uint64_t ksp = 0;
1103    bool is_enabled = true;
1104    struct intel_field_iterator iter;
1105    intel_field_iterator_init(&iter, strct, bind_bo.map, 0, false);
1106    while (intel_field_iterator_next(&iter)) {
1107       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1108          ksp = iter.raw_value;
1109       } else if (strcmp(iter.name, "Enable") == 0) {
1110 	is_enabled = iter.raw_value;
1111       }
1112    }
1113    if (is_enabled) {
1114       ctx_disassemble_program(ctx, ksp, "vertex shader");
1115       fprintf(ctx->fp, "\n");
1116    }
1117 }
1118 
1119 static void
decode_gs_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1120 decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1121 {
1122    struct intel_group *strct =
1123       intel_spec_find_struct(ctx->spec, "GS_STATE");
1124    if (strct == NULL) {
1125       fprintf(ctx->fp, "did not find GS_STATE info\n");
1126       return;
1127    }
1128 
1129    struct intel_batch_decode_bo bind_bo =
1130       ctx_get_bo(ctx, true, offset);
1131 
1132    if (bind_bo.map == NULL) {
1133       fprintf(ctx->fp, " gs state unavailable\n");
1134       return;
1135    }
1136 
1137    ctx_print_group(ctx, strct, offset, bind_bo.map);
1138 }
1139 
1140 static void
decode_clip_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1141 decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1142 {
1143    struct intel_group *strct =
1144       intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1145    if (strct == NULL) {
1146       fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1147       return;
1148    }
1149 
1150    struct intel_batch_decode_bo bind_bo =
1151       ctx_get_bo(ctx, true, offset);
1152 
1153    if (bind_bo.map == NULL) {
1154       fprintf(ctx->fp, " clip state unavailable\n");
1155       return;
1156    }
1157 
1158    ctx_print_group(ctx, strct, offset, bind_bo.map);
1159 
1160    struct intel_group *vp_strct =
1161       intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1162    if (vp_strct == NULL) {
1163       fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1164       return;
1165    }
1166    uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1167    struct intel_batch_decode_bo vp_bo =
1168       ctx_get_bo(ctx, true, clip_vp_offset);
1169    if (vp_bo.map == NULL) {
1170       fprintf(ctx->fp, " clip vp state unavailable\n");
1171       return;
1172    }
1173    ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1174 }
1175 
1176 static void
decode_sf_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1177 decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1178 {
1179    struct intel_group *strct =
1180       intel_spec_find_struct(ctx->spec, "SF_STATE");
1181    if (strct == NULL) {
1182       fprintf(ctx->fp, "did not find SF_STATE info\n");
1183       return;
1184    }
1185 
1186    struct intel_batch_decode_bo bind_bo =
1187       ctx_get_bo(ctx, true, offset);
1188 
1189    if (bind_bo.map == NULL) {
1190       fprintf(ctx->fp, " sf state unavailable\n");
1191       return;
1192    }
1193 
1194    ctx_print_group(ctx, strct, offset, bind_bo.map);
1195 
1196    struct intel_group *vp_strct =
1197       intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1198    if (vp_strct == NULL) {
1199       fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1200       return;
1201    }
1202 
1203    uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1204    struct intel_batch_decode_bo vp_bo =
1205       ctx_get_bo(ctx, true, sf_vp_offset);
1206    if (vp_bo.map == NULL) {
1207       fprintf(ctx->fp, " sf vp state unavailable\n");
1208       return;
1209    }
1210    ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1211 }
1212 
1213 static void
decode_wm_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1214 decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1215 {
1216    struct intel_group *strct =
1217       intel_spec_find_struct(ctx->spec, "WM_STATE");
1218    if (strct == NULL) {
1219       fprintf(ctx->fp, "did not find WM_STATE info\n");
1220       return;
1221    }
1222 
1223    struct intel_batch_decode_bo bind_bo =
1224       ctx_get_bo(ctx, true, offset);
1225 
1226    if (bind_bo.map == NULL) {
1227       fprintf(ctx->fp, " wm state unavailable\n");
1228       return;
1229    }
1230 
1231    ctx_print_group(ctx, strct, offset, bind_bo.map);
1232 
1233    decode_ps_kern(ctx, strct, bind_bo.map);
1234 }
1235 
1236 static void
decode_cc_state(struct intel_batch_decode_ctx * ctx,uint32_t offset)1237 decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1238 {
1239    struct intel_group *strct =
1240       intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1241    if (strct == NULL) {
1242       fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1243       return;
1244    }
1245 
1246    struct intel_batch_decode_bo bind_bo =
1247       ctx_get_bo(ctx, true, offset);
1248 
1249    if (bind_bo.map == NULL) {
1250       fprintf(ctx->fp, " cc state unavailable\n");
1251       return;
1252    }
1253 
1254    ctx_print_group(ctx, strct, offset, bind_bo.map);
1255 
1256    struct intel_group *vp_strct =
1257       intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1258    if (vp_strct == NULL) {
1259       fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1260       return;
1261    }
1262    uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1263    struct intel_batch_decode_bo vp_bo =
1264       ctx_get_bo(ctx, true, cc_vp_offset);
1265    if (vp_bo.map == NULL) {
1266       fprintf(ctx->fp, " cc vp state unavailable\n");
1267       return;
1268    }
1269    ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1270 }
1271 static void
decode_pipelined_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1272 decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1273 {
1274    fprintf(ctx->fp, "VS State Table:\n");
1275    decode_vs_state(ctx, p[1]);
1276    if (p[2] & 1) {
1277       fprintf(ctx->fp, "GS State Table:\n");
1278       decode_gs_state(ctx, p[2] & ~1);
1279    }
1280    fprintf(ctx->fp, "Clip State Table:\n");
1281    decode_clip_state(ctx, p[3] & ~1);
1282    fprintf(ctx->fp, "SF State Table:\n");
1283    decode_sf_state(ctx, p[4]);
1284    fprintf(ctx->fp, "WM State Table:\n");
1285    decode_wm_state(ctx, p[5]);
1286    fprintf(ctx->fp, "CC State Table:\n");
1287    decode_cc_state(ctx, p[6]);
1288 }
1289 
1290 static void
decode_cps_pointers(struct intel_batch_decode_ctx * ctx,const uint32_t * p)1291 decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1292 {
1293    decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1294 }
1295 
1296 struct custom_decoder {
1297    const char *cmd_name;
1298    void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1299 } custom_decoders[] = {
1300    { "STATE_BASE_ADDRESS", handle_state_base_address },
1301    { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1302    { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1303    { "COMPUTE_WALKER", handle_compute_walker },
1304    { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1305    { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1306    { "3DSTATE_VS", decode_single_ksp },
1307    { "3DSTATE_GS", decode_single_ksp },
1308    { "3DSTATE_DS", decode_single_ksp },
1309    { "3DSTATE_HS", decode_single_ksp },
1310    { "3DSTATE_PS", decode_ps_kernels },
1311    { "3DSTATE_WM", decode_ps_kernels },
1312    { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1313    { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1314    { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1315    { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1316    { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1317    { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1318 
1319    { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1320    { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1321    { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1322    { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1323    { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1324    { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1325 
1326    { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1327    { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1328    { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1329    { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1330    { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1331    { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1332 
1333    { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1334    { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1335    { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1336    { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1337    { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1338    { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1339    { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1340    { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1341    { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1342    { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1343    { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1344    { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1345 };
1346 
1347 void
intel_print_batch(struct intel_batch_decode_ctx * ctx,const uint32_t * batch,uint32_t batch_size,uint64_t batch_addr,bool from_ring)1348 intel_print_batch(struct intel_batch_decode_ctx *ctx,
1349                   const uint32_t *batch, uint32_t batch_size,
1350                   uint64_t batch_addr, bool from_ring)
1351 {
1352    const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1353    int length;
1354    struct intel_group *inst;
1355    const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1356 
1357    if (ctx->n_batch_buffer_start >= 100) {
1358       fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1359               (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1360               (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1361               reset_color);
1362       return;
1363    }
1364 
1365    ctx->n_batch_buffer_start++;
1366 
1367    for (p = batch; p < end; p += length) {
1368       inst = intel_ctx_find_instruction(ctx, p);
1369       length = intel_group_get_length(inst, p);
1370       assert(inst == NULL || length > 0);
1371       length = MAX2(1, length);
1372 
1373       uint64_t offset;
1374       if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1375          offset = batch_addr + ((char *)p - (char *)batch);
1376       else
1377          offset = 0;
1378 
1379       if (inst == NULL) {
1380          fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1381                  (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1382                  offset, p[0], reset_color);
1383 
1384          for (int i=1; i < length; i++) {
1385             fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1386                  (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1387                  offset + i * 4, p[i], reset_color);
1388          }
1389 
1390          continue;
1391       }
1392 
1393       const char *color;
1394       const char *inst_name = intel_group_get_name(inst);
1395       if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1396          reset_color = NORMAL;
1397          if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1398             if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1399                 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1400                color = GREEN_HEADER;
1401             else
1402                color = BLUE_HEADER;
1403          } else {
1404             color = NORMAL;
1405          }
1406       } else {
1407          color = "";
1408          reset_color = "";
1409       }
1410 
1411       fprintf(ctx->fp, "%s0x%08"PRIx64"%s:  0x%08x:  %-80s%s\n", color, offset,
1412               ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1413               inst_name, reset_color);
1414 
1415       if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1416          ctx_print_group(ctx, inst, offset, p);
1417 
1418          for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1419             if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) {
1420                custom_decoders[i].decode(ctx, p);
1421                break;
1422             }
1423          }
1424       }
1425 
1426       if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) {
1427          uint64_t next_batch_addr = 0;
1428          bool ppgtt = false;
1429          bool second_level = false;
1430          bool predicate = false;
1431          struct intel_field_iterator iter;
1432          intel_field_iterator_init(&iter, inst, p, 0, false);
1433          while (intel_field_iterator_next(&iter)) {
1434             if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1435                next_batch_addr = iter.raw_value;
1436             } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1437                second_level = iter.raw_value;
1438             } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1439                ppgtt = iter.raw_value;
1440             } else if (strcmp(iter.name, "Predication Enable") == 0) {
1441                predicate = iter.raw_value;
1442             }
1443          }
1444 
1445          if (!predicate) {
1446             struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1447 
1448             if (next_batch.map == NULL) {
1449                fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1450                        next_batch_addr);
1451             } else {
1452                intel_print_batch(ctx, next_batch.map, next_batch.size,
1453                                  next_batch.addr, false);
1454             }
1455             if (second_level) {
1456                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1457                 * like a subroutine call.  Commands that come afterwards get
1458                 * processed once the 2nd level batch buffer returns with
1459                 * MI_BATCH_BUFFER_END.
1460                 */
1461                continue;
1462             } else if (!from_ring) {
1463                /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1464                 * like a goto.  Nothing after it will ever get processed.  In
1465                 * order to prevent the recursion from growing, we just reset the
1466                 * loop and continue;
1467                 */
1468                break;
1469             }
1470          }
1471       } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
1472          break;
1473       }
1474    }
1475 
1476    ctx->n_batch_buffer_start--;
1477 }
1478