1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdbool.h>
25 #include <stdio.h>
26 
27 #include "vc4_qpu.h"
28 #include "vc4_qpu_defines.h"
29 
30 static const char *qpu_add_opcodes[] = {
31         [QPU_A_NOP] = "nop",
32         [QPU_A_FADD] = "fadd",
33         [QPU_A_FSUB] = "fsub",
34         [QPU_A_FMIN] = "fmin",
35         [QPU_A_FMAX] = "fmax",
36         [QPU_A_FMINABS] = "fminabs",
37         [QPU_A_FMAXABS] = "fmaxabs",
38         [QPU_A_FTOI] = "ftoi",
39         [QPU_A_ITOF] = "itof",
40         [QPU_A_ADD] = "add",
41         [QPU_A_SUB] = "sub",
42         [QPU_A_SHR] = "shr",
43         [QPU_A_ASR] = "asr",
44         [QPU_A_ROR] = "ror",
45         [QPU_A_SHL] = "shl",
46         [QPU_A_MIN] = "min",
47         [QPU_A_MAX] = "max",
48         [QPU_A_AND] = "and",
49         [QPU_A_OR] = "or",
50         [QPU_A_XOR] = "xor",
51         [QPU_A_NOT] = "not",
52         [QPU_A_CLZ] = "clz",
53         [QPU_A_V8ADDS] = "v8adds",
54         [QPU_A_V8SUBS] = "v8subs",
55 };
56 
57 static const char *qpu_mul_opcodes[] = {
58         [QPU_M_NOP] = "nop",
59         [QPU_M_FMUL] = "fmul",
60         [QPU_M_MUL24] = "mul24",
61         [QPU_M_V8MULD] = "v8muld",
62         [QPU_M_V8MIN] = "v8min",
63         [QPU_M_V8MAX] = "v8max",
64         [QPU_M_V8ADDS] = "v8adds",
65         [QPU_M_V8SUBS] = "v8subs",
66 };
67 
68 static const char *qpu_sig[] = {
69         [QPU_SIG_SW_BREAKPOINT] = "sig_brk",
70         [QPU_SIG_NONE] = "",
71         [QPU_SIG_THREAD_SWITCH] = "sig_switch",
72         [QPU_SIG_PROG_END] = "sig_end",
73         [QPU_SIG_WAIT_FOR_SCOREBOARD] = "sig_wait_score",
74         [QPU_SIG_SCOREBOARD_UNLOCK] = "sig_unlock_score",
75         [QPU_SIG_LAST_THREAD_SWITCH] = "sig_thread_switch",
76         [QPU_SIG_COVERAGE_LOAD] = "sig_coverage_load",
77         [QPU_SIG_COLOR_LOAD] = "sig_color_load",
78         [QPU_SIG_COLOR_LOAD_END] = "sig_color_load_end",
79         [QPU_SIG_LOAD_TMU0] = "load_tmu0",
80         [QPU_SIG_LOAD_TMU1] = "load_tmu1",
81         [QPU_SIG_ALPHA_MASK_LOAD] = "sig_alpha_mask_load",
82         [QPU_SIG_SMALL_IMM] = "sig_small_imm",
83         [QPU_SIG_LOAD_IMM] = "sig_load_imm",
84         [QPU_SIG_BRANCH] = "sig_branch",
85 };
86 
87 static const char *qpu_pack_mul[] = {
88         [QPU_PACK_MUL_NOP] = "",
89         [QPU_PACK_MUL_8888] = ".8888",
90         [QPU_PACK_MUL_8A] = ".8a",
91         [QPU_PACK_MUL_8B] = ".8b",
92         [QPU_PACK_MUL_8C] = ".8c",
93         [QPU_PACK_MUL_8D] = ".8d",
94 };
95 
96 /* The QPU unpack for A and R4 files can be described the same, it's just that
97  * the R4 variants are convert-to-float only, with no int support.
98  */
99 static const char *qpu_unpack[] = {
100         [QPU_UNPACK_NOP] = "",
101         [QPU_UNPACK_16A] = "16a",
102         [QPU_UNPACK_16B] = "16b",
103         [QPU_UNPACK_8D_REP] = "8d_rep",
104         [QPU_UNPACK_8A] = "8a",
105         [QPU_UNPACK_8B] = "8b",
106         [QPU_UNPACK_8C] = "8c",
107         [QPU_UNPACK_8D] = "8d",
108 };
109 
110 static const char *special_read_a[] = {
111         "uni",
112         NULL,
113         NULL,
114         "vary",
115         NULL,
116         NULL,
117         "elem",
118         "nop",
119         NULL,
120         "x_pix",
121         "ms_flags",
122         NULL,
123         NULL,
124         NULL,
125         NULL,
126         NULL,
127         "vpm_read",
128         "vpm_ld_busy",
129         "vpm_ld_wait",
130         "mutex_acq"
131 };
132 
133 static const char *special_read_b[] = {
134         "uni",
135         NULL,
136         NULL,
137         "vary",
138         NULL,
139         NULL,
140         "qpu",
141         "nop",
142         NULL,
143         "y_pix",
144         "rev_flag",
145         NULL,
146         NULL,
147         NULL,
148         NULL,
149         NULL,
150         "vpm_read",
151         "vpm_st_busy",
152         "vpm_st_wait",
153         "mutex_acq"
154 };
155 
156 /**
157  * This has the B-file descriptions for register writes.
158  *
159  * Since only a couple of regs are different between A and B, the A overrides
160  * are in get_special_write_desc().
161  */
162 static const char *special_write[] = {
163         [QPU_W_ACC0] = "r0",
164         [QPU_W_ACC1] = "r1",
165         [QPU_W_ACC2] = "r2",
166         [QPU_W_ACC3] = "r3",
167         [QPU_W_TMU_NOSWAP] = "tmu_noswap",
168         [QPU_W_ACC5] = "r5",
169         [QPU_W_HOST_INT] = "host_int",
170         [QPU_W_NOP] = "nop",
171         [QPU_W_UNIFORMS_ADDRESS] = "uniforms_addr",
172         [QPU_W_QUAD_XY] = "quad_y",
173         [QPU_W_MS_FLAGS] = "ms_flags",
174         [QPU_W_TLB_STENCIL_SETUP] = "tlb_stencil_setup",
175         [QPU_W_TLB_Z] = "tlb_z",
176         [QPU_W_TLB_COLOR_MS] = "tlb_color_ms",
177         [QPU_W_TLB_COLOR_ALL] = "tlb_color_all",
178         [QPU_W_VPM] = "vpm",
179         [QPU_W_VPMVCD_SETUP] = "vw_setup",
180         [QPU_W_VPM_ADDR] = "vw_addr",
181         [QPU_W_MUTEX_RELEASE] = "mutex_release",
182         [QPU_W_SFU_RECIP] = "sfu_recip",
183         [QPU_W_SFU_RECIPSQRT] = "sfu_recipsqrt",
184         [QPU_W_SFU_EXP] = "sfu_exp",
185         [QPU_W_SFU_LOG] = "sfu_log",
186         [QPU_W_TMU0_S] = "tmu0_s",
187         [QPU_W_TMU0_T] = "tmu0_t",
188         [QPU_W_TMU0_R] = "tmu0_r",
189         [QPU_W_TMU0_B] = "tmu0_b",
190         [QPU_W_TMU1_S] = "tmu1_s",
191         [QPU_W_TMU1_T] = "tmu1_t",
192         [QPU_W_TMU1_R] = "tmu1_r",
193         [QPU_W_TMU1_B] = "tmu1_b",
194 };
195 
196 static const char *qpu_pack_a[] = {
197         [QPU_PACK_A_NOP] = "",
198         [QPU_PACK_A_16A] = ".16a",
199         [QPU_PACK_A_16B] = ".16b",
200         [QPU_PACK_A_8888] = ".8888",
201         [QPU_PACK_A_8A] = ".8a",
202         [QPU_PACK_A_8B] = ".8b",
203         [QPU_PACK_A_8C] = ".8c",
204         [QPU_PACK_A_8D] = ".8d",
205 
206         [QPU_PACK_A_32_SAT] = ".sat",
207         [QPU_PACK_A_16A_SAT] = ".16a.sat",
208         [QPU_PACK_A_16B_SAT] = ".16b.sat",
209         [QPU_PACK_A_8888_SAT] = ".8888.sat",
210         [QPU_PACK_A_8A_SAT] = ".8a.sat",
211         [QPU_PACK_A_8B_SAT] = ".8b.sat",
212         [QPU_PACK_A_8C_SAT] = ".8c.sat",
213         [QPU_PACK_A_8D_SAT] = ".8d.sat",
214 };
215 
216 static const char *qpu_cond[] = {
217         [QPU_COND_NEVER] = ".never",
218         [QPU_COND_ALWAYS] = "",
219         [QPU_COND_ZS] = ".zs",
220         [QPU_COND_ZC] = ".zc",
221         [QPU_COND_NS] = ".ns",
222         [QPU_COND_NC] = ".nc",
223         [QPU_COND_CS] = ".cs",
224         [QPU_COND_CC] = ".cc",
225 };
226 
227 static const char *qpu_cond_branch[] = {
228         [QPU_COND_BRANCH_ALL_ZS] = ".all_zs",
229         [QPU_COND_BRANCH_ALL_ZC] = ".all_zc",
230         [QPU_COND_BRANCH_ANY_ZS] = ".any_zs",
231         [QPU_COND_BRANCH_ANY_ZC] = ".any_zc",
232         [QPU_COND_BRANCH_ALL_NS] = ".all_ns",
233         [QPU_COND_BRANCH_ALL_NC] = ".all_nc",
234         [QPU_COND_BRANCH_ANY_NS] = ".any_ns",
235         [QPU_COND_BRANCH_ANY_NC] = ".any_nc",
236         [QPU_COND_BRANCH_ALL_CS] = ".all_cs",
237         [QPU_COND_BRANCH_ALL_CC] = ".all_cc",
238         [QPU_COND_BRANCH_ANY_CS] = ".any_cs",
239         [QPU_COND_BRANCH_ANY_CC] = ".any_cc",
240         [QPU_COND_BRANCH_ALWAYS] = "",
241 };
242 
243 #define DESC(array, index)                                        \
244         ((index >= ARRAY_SIZE(array) || !(array)[index]) ?         \
245          "???" : (array)[index])
246 
247 static const char *
get_special_write_desc(int reg,bool is_a)248 get_special_write_desc(int reg, bool is_a)
249 {
250         if (is_a) {
251                 switch (reg) {
252                 case QPU_W_QUAD_XY:
253                         return "quad_x";
254                 case QPU_W_VPMVCD_SETUP:
255                         return "vr_setup";
256                 case QPU_W_VPM_ADDR:
257                         return "vr_addr";
258                 }
259         }
260 
261         return special_write[reg];
262 }
263 
264 void
vc4_qpu_disasm_pack_mul(FILE * out,uint32_t pack)265 vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack)
266 {
267         fprintf(out, "%s", DESC(qpu_pack_mul, pack));
268 }
269 
270 void
vc4_qpu_disasm_pack_a(FILE * out,uint32_t pack)271 vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack)
272 {
273         fprintf(out, "%s", DESC(qpu_pack_a, pack));
274 }
275 
276 void
vc4_qpu_disasm_unpack(FILE * out,uint32_t unpack)277 vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack)
278 {
279         if (unpack != QPU_UNPACK_NOP)
280                 fprintf(out, ".%s", DESC(qpu_unpack, unpack));
281 }
282 
283 void
vc4_qpu_disasm_cond(FILE * out,uint32_t cond)284 vc4_qpu_disasm_cond(FILE *out, uint32_t cond)
285 {
286         fprintf(out, "%s", DESC(qpu_cond, cond));
287 }
288 
289 void
vc4_qpu_disasm_cond_branch(FILE * out,uint32_t cond)290 vc4_qpu_disasm_cond_branch(FILE *out, uint32_t cond)
291 {
292         fprintf(out, "%s", DESC(qpu_cond_branch, cond));
293 }
294 
295 static void
print_alu_dst(uint64_t inst,bool is_mul)296 print_alu_dst(uint64_t inst, bool is_mul)
297 {
298         bool is_a = is_mul == ((inst & QPU_WS) != 0);
299         uint32_t waddr = (is_mul ?
300                           QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
301                           QPU_GET_FIELD(inst, QPU_WADDR_ADD));
302         const char *file = is_a ? "a" : "b";
303         uint32_t pack = QPU_GET_FIELD(inst, QPU_PACK);
304 
305         if (waddr <= 31)
306                 fprintf(stderr, "r%s%d", file, waddr);
307         else if (get_special_write_desc(waddr, is_a))
308                 fprintf(stderr, "%s", get_special_write_desc(waddr, is_a));
309         else
310                 fprintf(stderr, "%s%d?", file, waddr);
311 
312         if (is_mul && (inst & QPU_PM)) {
313                 vc4_qpu_disasm_pack_mul(stderr, pack);
314         } else if (is_a && !(inst & QPU_PM)) {
315                 vc4_qpu_disasm_pack_a(stderr, pack);
316         }
317 }
318 
319 static void
print_alu_src(uint64_t inst,uint32_t mux,bool is_mul)320 print_alu_src(uint64_t inst, uint32_t mux, bool is_mul)
321 {
322         bool is_a = mux != QPU_MUX_B;
323         const char *file = is_a ? "a" : "b";
324         uint32_t raddr = (is_a ?
325                           QPU_GET_FIELD(inst, QPU_RADDR_A) :
326                           QPU_GET_FIELD(inst, QPU_RADDR_B));
327         uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK);
328         bool has_si = QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM;
329         uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM);
330 
331         if (mux <= QPU_MUX_R5) {
332                 fprintf(stderr, "r%d", mux);
333                 if (has_si && is_mul && si >= QPU_SMALL_IMM_MUL_ROT + 1)
334                         fprintf(stderr, "+%d", si - QPU_SMALL_IMM_MUL_ROT);
335         } else if (!is_a && has_si) {
336                 if (si <= 15)
337                         fprintf(stderr, "%d", si);
338                 else if (si <= 31)
339                         fprintf(stderr, "%d", -16 + (si - 16));
340                 else if (si <= 39)
341                         fprintf(stderr, "%.1f", (float)(1 << (si - 32)));
342                 else if (si <= 47)
343                         fprintf(stderr, "%f", 1.0f / (1 << (48 - si)));
344                 else
345                         fprintf(stderr, "<bad imm %d>", si);
346         } else if (raddr <= 31)
347                 fprintf(stderr, "r%s%d", file, raddr);
348         else {
349                 if (is_a)
350                         fprintf(stderr, "%s", DESC(special_read_a, raddr - 32));
351                 else
352                         fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
353         }
354 
355         if (((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
356              (mux == QPU_MUX_R4 && (inst & QPU_PM)))) {
357                 vc4_qpu_disasm_unpack(stderr, unpack);
358         }
359 }
360 
361 static void
print_add_op(uint64_t inst)362 print_add_op(uint64_t inst)
363 {
364         uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
365         uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD);
366         bool is_mov = (op_add == QPU_A_OR &&
367                        QPU_GET_FIELD(inst, QPU_ADD_A) ==
368                        QPU_GET_FIELD(inst, QPU_ADD_B));
369 
370         if (is_mov)
371                 fprintf(stderr, "mov");
372         else
373                 fprintf(stderr, "%s", DESC(qpu_add_opcodes, op_add));
374 
375         if ((inst & QPU_SF) && op_add != QPU_A_NOP)
376                 fprintf(stderr, ".sf");
377 
378         if (op_add != QPU_A_NOP)
379                 vc4_qpu_disasm_cond(stderr, cond);
380 
381         fprintf(stderr, " ");
382         print_alu_dst(inst, false);
383         fprintf(stderr, ", ");
384 
385         print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_A), false);
386 
387         if (!is_mov) {
388                 fprintf(stderr, ", ");
389 
390                 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_B), false);
391         }
392 }
393 
394 static void
print_mul_op(uint64_t inst)395 print_mul_op(uint64_t inst)
396 {
397         uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
398         uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL);
399         uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL);
400         bool is_mov = (op_mul == QPU_M_V8MIN &&
401                        QPU_GET_FIELD(inst, QPU_MUL_A) ==
402                        QPU_GET_FIELD(inst, QPU_MUL_B));
403 
404         if (is_mov)
405                 fprintf(stderr, "mov");
406         else
407                 fprintf(stderr, "%s", DESC(qpu_mul_opcodes, op_mul));
408 
409         if ((inst & QPU_SF) && op_add == QPU_A_NOP)
410                 fprintf(stderr, ".sf");
411 
412         if (op_mul != QPU_M_NOP)
413                 vc4_qpu_disasm_cond(stderr, cond);
414 
415         fprintf(stderr, " ");
416         print_alu_dst(inst, true);
417         fprintf(stderr, ", ");
418 
419         print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_A), true);
420 
421         if (!is_mov) {
422                 fprintf(stderr, ", ");
423                 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_B), true);
424         }
425 }
426 
427 static void
print_load_imm(uint64_t inst)428 print_load_imm(uint64_t inst)
429 {
430         uint32_t imm = inst;
431         uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
432         uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
433         uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
434         uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
435 
436         fprintf(stderr, "load_imm ");
437 
438         print_alu_dst(inst, false);
439         if (waddr_add != QPU_W_NOP)
440                 vc4_qpu_disasm_cond(stderr, cond_add);
441         fprintf(stderr, ", ");
442 
443         print_alu_dst(inst, true);
444         if (waddr_mul != QPU_W_NOP)
445                 vc4_qpu_disasm_cond(stderr, cond_mul);
446         fprintf(stderr, ", ");
447 
448         fprintf(stderr, "0x%08x (%f)", imm, uif(imm));
449 }
450 
451 void
vc4_qpu_disasm(const uint64_t * instructions,int num_instructions)452 vc4_qpu_disasm(const uint64_t *instructions, int num_instructions)
453 {
454         for (int i = 0; i < num_instructions; i++) {
455                 uint64_t inst = instructions[i];
456                 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
457 
458                 switch (sig) {
459                 case QPU_SIG_BRANCH:
460                         fprintf(stderr, "branch");
461                         vc4_qpu_disasm_cond_branch(stderr,
462                                                    QPU_GET_FIELD(inst,
463                                                                  QPU_BRANCH_COND));
464 
465                         fprintf(stderr, " %d", (uint32_t)inst);
466                         break;
467 
468                 case QPU_SIG_LOAD_IMM:
469                         print_load_imm(inst);
470                         break;
471                 default:
472                         if (sig != QPU_SIG_NONE)
473                                 fprintf(stderr, "%s ", DESC(qpu_sig, sig));
474                         print_add_op(inst);
475                         fprintf(stderr, " ; ");
476                         print_mul_op(inst);
477                         break;
478                 }
479 
480                 if (num_instructions != 1)
481                         fprintf(stderr, "\n");
482         }
483 }
484