1 /*
2  * Copyright © 2016 Broadcom
3  * Copyright © 2020 Google LLC
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /* Unit test for disassembly of instructions.
26  *
27  * The goal is to take instructions we've seen the blob produce, and test that
28  * we can disassemble them correctly.  For the next person investigating the
29  * behavior of this instruction, please include the testcase it was generated
30  * from, and the qcom disassembly as a comment if it differs from what we
31  * produce.
32  */
33 
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include "util/macros.h"
38 
39 #include "ir3.h"
40 #include "ir3_assembler.h"
41 #include "ir3_shader.h"
42 
43 #include "isa/isa.h"
44 
45 /* clang-format off */
46 #define INSTR_5XX(i, d, ...) { .gpu_id = 540, .instr = #i, .expected = d, __VA_ARGS__ }
47 #define INSTR_6XX(i, d, ...) { .gpu_id = 630, .instr = #i, .expected = d, __VA_ARGS__ }
48 /* clang-format on */
49 
50 static const struct test {
51    int gpu_id;
52    const char *instr;
53    const char *expected;
54    /**
55     * Do we expect asm parse fail (ie. for things not (yet) supported by
56     * ir3_parser.y)
57     */
58    bool parse_fail;
59 } tests[] = {
60    /* clang-format off */
61 	/* cat0 */
62 	INSTR_6XX(00000000_00000000, "nop"),
63 	INSTR_6XX(00000200_00000000, "(rpt2)nop"),
64 	INSTR_6XX(03000000_00000000, "end"),
65 	INSTR_6XX(00800000_00000004, "br p0.x, #4"),
66 	INSTR_6XX(00900000_00000003, "br !p0.x, #3"),
67 	INSTR_6XX(03820000_00000015, "shps #21"), /* emit */
68 	INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */
69 	INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */
70 	INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"),
71 	INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"),
72 	INSTR_6XX(07820000_00000000, "prede"),
73 	INSTR_6XX(00800063_0000001e, "brac.3 #30"),
74 	INSTR_6XX(06820000_00000000, "predt p0.x"),
75 	INSTR_6XX(07020000_00000000, "predf p0.x"),
76 	INSTR_6XX(07820000_00000000, "prede"),
77 
78 	/* cat1 */
79 	INSTR_6XX(20244000_00000020, "mov.f32f32 r0.x, c8.x"),
80 	INSTR_6XX(20200000_00000020, "mov.f16f16 hr0.x, hc8.x"),
81 	INSTR_6XX(20150000_00000000, "cov.s32s16 hr0.x, r0.x"),
82 	INSTR_6XX(20156004_00000c11, "(ul)mov.s32s32 r1.x, c<a0.x + 17>"),
83 	INSTR_6XX(201100f4_00000000, "mova a0.x, hr0.x"),
84 	INSTR_6XX(20244905_00000410, "(rpt1)mov.f32f32 r1.y, (r)c260.x"),
85 	INSTR_6XX(20174004_00000008, "mov.s32s32 r<a0.x + 4>, r2.x"),
86 	INSTR_6XX(20130000_00000005, "mov.s16s16 hr<a0.x>, hr1.y"),
87 	INSTR_6XX(20110004_00000800, "mov.s16s16 hr1.x, hr<a0.x>"),
88 	/* dEQP-VK.subgroups.ballot.compute.compute */
89 	INSTR_6XX(260cc3c0_00000000, "movmsk.w128 r48.x"), /* movmsk.w128 sr48.x */
90 
91 	INSTR_6XX(240cc004_00030201, "swz.u32u32 r1.x, r0.w, r0.y, r0.z"),
92 	INSTR_6XX(2400c105_04030201, "gat.f16u32 r1.y, hr0.y, hr0.z, hr0.w, hr1.x"),
93 	INSTR_6XX(240c0205_04030201, "sct.u32f16 hr1.y, hr0.z, hr0.w, hr1.x, r0.y"),
94 	INSTR_6XX(2400c205_04030201, "sct.f16u32 r1.y, r0.z, r0.w, r1.x, hr0.y"),
95 
96 	INSTR_6XX(20510005_0000ffff, "mov.s16s16 hr1.y, -1"),
97 	INSTR_6XX(20400005_00003900, "mov.f16f16 hr1.y, h(0.625000)"),
98 	INSTR_6XX(20400006_00003800, "mov.f16f16 hr1.z, h(0.500000)"),
99 	INSTR_6XX(204880f5_00000000, "mova1 a1.x, 0"),
100 
101 	/* cat2 */
102 	INSTR_6XX(40104002_0c210001, "add.f hr0.z, r0.y, c<a0.x + 33>"),
103 	INSTR_6XX(40b80804_10408004, "(nop3) cmps.f.lt r1.x, (abs)r1.x, c16.x"),
104 	INSTR_6XX(47308a02_00002000, "(rpt2)bary.f (ei)r0.z, (r)0, r0.x"),
105 	INSTR_6XX(43480801_00008001, "(nop3) absneg.s hr0.y, (abs)hr0.y"),
106 	INSTR_6XX(50600004_2c010004, "(sy)mul.f hr1.x, hr1.x, h(0.5)"),
107 	INSTR_6XX(42280807_27ff0000, "(nop3) add.s hr1.w, hr0.x, h(-1)"),
108 	INSTR_6XX(40a500f8_2c000004, "cmps.f.ne p0.x, hr1.x, h(0.0)"),
109 	INSTR_6XX(438000f8_20010009, "and.b p0.x, hr2.y, h(1)"),
110 	INSTR_6XX(438000f9_00020001, "and.b p0.y, hr0.y, hr0.z"),
111 	INSTR_6XX(40080902_50200006, "(rpt1)add.f hr0.z, (r)hr1.z, (neg)(r)hc8.x"),
112 	INSTR_6XX(42380c01_00040001, "(sat)(nop3) add.s r0.y, r0.y, r1.x"),
113 	INSTR_6XX(42480000_48801086, "(nop2) sub.u hr0.x, hc33.z, (neg)hr<a0.x + 128>"),
114 	INSTR_6XX(46b00001_00001020, "clz.b r0.y, c8.x"),
115 	INSTR_6XX(46700009_00000009, "bfrev.b r2.y, r2.y"),
116 
117 	/* cat3 */
118 	INSTR_6XX(66000000_10421041, "sel.f16 hr0.x, hc16.y, hr0.x, hc16.z"),
119 	INSTR_6XX(64848109_109a9099, "(rpt1)sel.b32 r2.y, c38.y, (r)r2.y, c38.z"),
120 	INSTR_6XX(64810904_30521036, "(rpt1)sel.b32 r1.x, (r)c13.z, r0.z, (r)c20.z"),
121 	INSTR_6XX(64818902_20041032, "(rpt1)sel.b32 r0.z, (r)c12.z, r0.w, (r)r1.x"),
122 	INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"),
123 	INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"),
124 	INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"),
125 	INSTR_6XX(65900820_100cb008, "(nop3) shlg.b16 hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */
126 	INSTR_6XX(65ae085c_0002a001, "(nop3) shlg.b16 hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */
127 	INSTR_6XX(65900820_0c0aac05, "(nop3) shlg.b16 hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */
128 
129 	/* cat4 */
130 	INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"),
131 
132 	/* cat5 */
133 	/* dEQP-VK.glsl.derivate.dfdx.uniform_if.float_mediump */
134 	INSTR_6XX(a3801102_00000001, "dsx (f32)(x)r0.z, r0.x"), /* dsx (f32)(xOOO)r0.z, r0.x */
135 	/* dEQP-VK.glsl.derivate.dfdy.uniform_if.float_mediump */
136 	INSTR_6XX(a3c01102_00000001, "dsy (f32)(x)r0.z, r0.x"), /* dsy (f32)(xOOO)r0.z, r0.x */
137 	/* dEQP-VK.glsl.derivate.dfdxfine.uniform_loop.float_highp */
138 	INSTR_6XX(a6001105_00000001, "dsxpp.1 (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */
139 	INSTR_6XX(a6201105_00000001, "dsxpp.1.p (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */
140 
141 	INSTR_6XX(a2802f00_00000001, "getsize (u16)(xyzw)hr0.x, r0.x, t#0"),
142 	INSTR_6XX(a0c89f04_c4600005, "sam.base1 (f32)(xyzw)r1.x, r0.z, s#3, t#2"),  /* sam.s2en.mode6.base1 (f32)(xyzw)r1.x, r0.z, 35 */
143 	INSTR_6XX(a1c85f00_c0200005, "getlod.base0 (s32)(xyzw)r0.x, r0.z, s#1, t#0"),  /* getlod.s2en.mode6.base0 (s32)(xyzw)r0.x, r0.z, 1 */
144 	INSTR_6XX(a1000f00_00000004, "samb (f16)(xyzw)hr0.x, hr0.z, hr0.x, s#0, t#0"),
145 	INSTR_6XX(a1000f00_00000003, "samb (f16)(xyzw)hr0.x, r0.y, r0.x, s#0, t#0"),
146 	INSTR_6XX(a0c00f00_04400002, "sam (f16)(xyzw)hr0.x, hr0.y, s#2, t#2"),
147 	INSTR_6XX(a6c02f00_00000000, "rgetinfo (u16)(xyzw)hr0.x"),
148 	INSTR_6XX(a3482f08_c0000000, "getinfo.base0 (u16)(xyzw)hr2.x, t#0"),
149 	/* dEQP-GLES31.functional.texture.texture_buffer.render.as_fragment_texture.buffer_size_65536 */
150 	INSTR_5XX(a2c03102_00000000, "getbuf (u32)(x)r0.z, t#0"),
151 	INSTR_6XX(a0c81f00_e0200005, "sam.base0 (f32)(xyzw)r0.x, r0.z, s#1, a1.x"),
152 
153 
154 	/* cat6 */
155 
156 	INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */
157 	INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"), /* (sy)stib.a.u32.2d.1 g[r1.x], r0.x, r0.z, 1.  r1.x is offset in ibo, r0.x is value*/
158 	/* dEQP-VK.image.load_store.1d_array.r8g8b8a8_unorm */
159 	INSTR_5XX(c1a20006_0600ba01, "ldib.typed.2d.f32.4 r1.z, g[0], r0.z, r1.z"), /* ldib.a.f32.2d.4 r1.z, g[r0.z], r1.z, 0.  r0.z is offset in ibo as src.  r1.z */
160 	/* dEQP-VK.image.load_store.3d.r32g32b32a32_sint */
161 	INSTR_5XX(c1aa0003_0500fc01, "ldib.typed.3d.s32.4 r0.w, g[0], r0.w, r1.y"), /* ldib.a.s32.3d.4 r0.w, g[r0.w], r1.y, 0.  r0.w is offset in ibo as src, and dst */
162 	/* dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_image.vertex.descriptor_array.3d */
163 	INSTR_5XX(c1a20204_0401fc01, "ldib.typed.3d.f32.4 r1.x, g[1], r1.w, r1.x"), /* ldib.a.f32.3d.4 r1.x, g[r1.w], r1.x, 1 */
164 	/* dEQP-VK.binding_model.shader_access.secondary_cmd_buf.with_push.storage_texel_buffer.vertex_fragment.single_descriptor.offset_zero */
165 	INSTR_5XX(c1a20005_0501be01, "ldib.typed.4d.f32.4 r1.y, g[0], r1.z, r1.y"), /* ldib.a.f32.1dtype.4 r1.y, g[r1.z], r1.y, 0 */
166 	/* dEQP-VK.texture.filtering.cube.formats.r8g8b8a8_snorm_nearest */
167 	INSTR_5XX(c1a60200_0000ba01, "ldib.typed.2d.u32.4 r0.x, g[1], r0.z, r0.x"), /* ldib.a.u32.2d.4 r0.x, g[r0.z], r0.x, 1 */
168 
169 	// TODO is this a real instruction?  Or float -6.0 ?
170 	// INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),
171 	/* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
172 	INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
173 	INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
174 	INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"),
175 	INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"),
176 	INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"),
177 
178 	/* Customely crafted */
179 	INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"),
180 	INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"),
181 
182 	INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
183 	INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
184 	INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
185 	INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
186 	INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"),
187 
188 	/* Found in TCS/TES shaders of GTA V */
189 	INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */
190 
191 	/* Customely crafted */
192 	INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"),
193 
194 	INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
195 	INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
196 	INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
197 	INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
198 
199 	/* dEQP-GLES3.functional.ubo.random.basic_arrays.0 */
200 	INSTR_6XX(c7020020_01800000, "stc c[32], r0.x, 1", .parse_fail=true),
201 	/* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
202 	INSTR_6XX(c7060020_03800000, "stc c[32], r0.x, 3", .parse_fail=true),
203 
204 	/* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
205 	INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */
206 
207 	INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 r0.z, r0.x, 2"),
208 #if 0
209    /* TODO blob sometimes/frequently sets b0, although there does not seem
210     * to be an obvious pattern and our encoding never sets it.  AFAICT it
211     * is a dontcare bit
212     */
213    /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */
214    INSTR_6XX(c0220200_0361b801, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */
215 #else
216    /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */
217    INSTR_6XX(c0220200_0361b800, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */
218 #endif
219 
220    /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
221    INSTR_6XX(c2c21100_04800006, "stlw.f32 l[r2.x], r0.w, 4"),
222    INSTR_6XX(c2c20f00_01800004, "stlw.f32 l[r1.w], r0.z, 1"),
223    INSTR_6XX(c2860003_02808011, "ldlw.u32 r0.w, l[r0.z+8], 2"),
224 
225    /* dEQP-VK.compute.basic.shared_var_single_group */
226    INSTR_6XX(c1060500_01800008, "stl.u32 l[r0.z], r1.x, 1"),
227    INSTR_6XX(c0460001_01804001, "ldl.u32 r0.y, l[r0.y], 1"),
228 
229    INSTR_6XX(c0860018_03820001, "ldp.u32 r6.x, p[r2.x], 3"),
230    INSTR_6XX(c0420002_01808019, "ldl.f32 r0.z, l[r0.z+12], 1"),
231    INSTR_6XX(c1021710_04800000, "stl.f32 l[r2.w+16], r0.x, 4"),
232    INSTR_6XX(d7c60011_03c00000, "(sy)ldlv.u32 r4.y, l[0], 3"),
233 
234    /* resinfo */
235    INSTR_6XX(c0260000_0063c200, "resinfo.b.untyped.2d.u32.1.imm r0.x, 0"), /* resinfo.u32.2d.mode0.base0 r0.x, 0 */
236    /* dEQP-GLES31.functional.image_load_store.buffer.image_size.writeonly_7.txt */
237    INSTR_6XX(c0260000_0063c000, "resinfo.b.untyped.1d.u32.1.imm r0.x, 0"), /* resinfo.u32.1d.mode0.base0 r0.x, 0 */
238    /* dEQP-VK.image.image_size.2d.readonly_12x34.txt */
239    INSTR_6XX(c0260000_0063c300, "resinfo.b.untyped.2d.u32.1.imm.base0 r0.x, 0"), /* resinfo.u32.2d.mode4.base0 r0.x, 0 */
240    /* Custom test */
241    INSTR_6XX(c0260000_0063c382, "resinfo.b.untyped.2d.u32.1.nonuniform.base1 r0.x, r0.x"), /* resinfo.u32.2d.mode6.base1 r0.x, r0.x */
242 
243    /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */
244    INSTR_5XX(c3e60000_00000200, "resinfo.u32.2d r0.x, g[0]"), /* resinfo.u32.2d r0.x, 0 */
245 #if 0
246    /* TODO our encoding differs in b11 ('typed'), which seems to be a dontcare bit */
247    /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */
248    INSTR_5XX(c3e60000_00000e00, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */
249    /* dEQP-GLES31.functional.image_load_store.3d.image_size.readonly_writeonly_12x34x56 */
250    INSTR_5XX(c3e60000_00000c00, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */
251 #else
252    /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */
253    INSTR_5XX(c3e60000_00000600, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */
254    /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */
255    INSTR_5XX(c3e60000_00000400, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */
256 #endif
257 
258    /* ldgb */
259    /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_vec4 */
260    INSTR_5XX(c6e20000_06003600, "ldgb.untyped.4d.f32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.f32.4 r0.x, g[r0.x], r1.z, 0 */
261    /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_ivec4 */
262    INSTR_5XX(c6ea0000_06003600, "ldgb.untyped.4d.s32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.s32.4 r0.x, g[r0.x], r1.z, 0 */
263    /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_float */
264    INSTR_5XX(c6e20000_02000600, "ldgb.untyped.4d.f32.1 r0.x, g[0], r0.x, r0.z"), /* ldgb.a.untyped.1dtype.f32.1 r0.x, g[r0.x], r0.z, 0 */
265    /* dEQP-GLES31.functional.ssbo.layout.random.vector_types.0 */
266    INSTR_5XX(c6ea0008_14002600, "ldgb.untyped.4d.s32.3 r2.x, g[0], r0.x, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r2.x, g[r0.x], r5.x, 0 */
267    INSTR_5XX(c6ea0204_1401a600, "ldgb.untyped.4d.s32.3 r1.x, g[1], r1.z, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r1.x, g[r1.z], r5.x, 1 */
268 
269    /* stgb */
270    INSTR_5XX(c7220028_0480000d, "stgb.untyped.1d.f32.1 g[0], r1.z, 4, r10.x"), /* stgb.untyped.1d.1 g[r10.x], r1.z, 4, r0.x */
271    INSTR_5XX(c7260023_02800009, "stgb.untyped.1d.u32.1 g[0], r1.x, 2, r8.w"),  /* stgb.untyped.1d.1 g[r8.w], r1.x, 2, r0.x */
272 
273    /* discard stuff */
274    INSTR_6XX(42b400f8_20010004, "cmps.s.eq p0.x, r1.x, 1"),
275    INSTR_6XX(02800000_00000000, "kill p0.x"),
276 
277    /* Immediates */
278    INSTR_6XX(40100007_68000008, "add.f r1.w, r2.x, (neg)(0.0)"),
279    INSTR_6XX(40100007_68010008, "add.f r1.w, r2.x, (neg)(0.5)"),
280    INSTR_6XX(40100007_68020008, "add.f r1.w, r2.x, (neg)(1.0)"),
281    INSTR_6XX(40100007_68030008, "add.f r1.w, r2.x, (neg)(2.0)"),
282    INSTR_6XX(40100007_68040008, "add.f r1.w, r2.x, (neg)(e)"),
283    INSTR_6XX(40100007_68050008, "add.f r1.w, r2.x, (neg)(pi)"),
284    INSTR_6XX(40100007_68060008, "add.f r1.w, r2.x, (neg)(1/pi)"),
285    INSTR_6XX(40100007_68070008, "add.f r1.w, r2.x, (neg)(1/log2(e))"),
286    INSTR_6XX(40100007_68080008, "add.f r1.w, r2.x, (neg)(log2(e))"),
287    INSTR_6XX(40100007_68090008, "add.f r1.w, r2.x, (neg)(1/log2(10))"),
288    INSTR_6XX(40100007_680a0008, "add.f r1.w, r2.x, (neg)(log2(10))"),
289    INSTR_6XX(40100007_680b0008, "add.f r1.w, r2.x, (neg)(4.0)"),
290 
291    /* LDC.  Our disasm differs greatly from qcom here, and we've got some
292     * important info they lack(?!), but same goes the other way.
293     */
294    /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.uniform_fragment */
295    INSTR_6XX(c0260000_00c78040, "ldc.offset0.1.uniform r0.x, 0, r0.x"), /* ldc.1.mode1.base0 r0.x, 0, r0.x */
296    INSTR_6XX(c0260201_00c78040, "ldc.offset0.1.uniform r0.y, 0, r0.y"), /* ldc.1.mode1.base0 r0.y, 0, r0.y */
297    /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.dynamically_uniform_fragment  */
298    INSTR_6XX(c0260000_00c78080, "ldc.offset0.1.nonuniform r0.x, 0, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */
299    INSTR_6XX(c0260201_00c78080, "ldc.offset0.1.nonuniform r0.y, 0, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */
300 
301    /* custom */
302    INSTR_6XX(c0260201_ffc78080, "ldc.offset0.1.nonuniform r0.y, 255, r0.y"), /* ldc.1.mode2.base0 r0.y, 255, r0.y */
303 
304    /* custom shaders, loading .x, .y, .z, .w from an array of vec4 in block 0 */
305    INSTR_6XX(c0260000_00478000, "ldc.offset0.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
306    INSTR_6XX(c0260000_00478200, "ldc.offset1.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
307    INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
308    INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
309 
310    /* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */
311    INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"),
312    INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"),
313    INSTR_6XX(c1465ba0_01803e2a, "stp.u32 p[r11.y-96], r5.y, 1"),
314    INSTR_6XX(c0860008_01860001, "ldp.u32 r2.x, p[r6.x], 1"),
315    /* Custom stp based on above to catch a disasm bug. */
316    INSTR_6XX(c1465b00_0180022a, "stp.u32 p[r11.y+256], r5.y, 1"),
317 
318    /* Atomic: */
319 #if 0
320    /* TODO our encoding differs in b53 for these two */
321    INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
322    INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
323 #else
324    INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
325    INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
326 #endif
327    INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
328 
329    /* Bindless atomic: */
330    INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
331    INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
332    INSTR_6XX(c0360000_0365c800, "atomic.b.max.typed.1d.u32.1.imm r0.x, r0.w, 0"),   /* atomic.b.max.g.u32.1d.mode0.base0 r0.x,r0.w,0 */
333 
334    /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */
335    INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"),
336    /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d (looks like maybe the compiler didn't figure out */
337    INSTR_6XX(a0c81f07_0100000b, "sam.s2en (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */
338    /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */
339    INSTR_6XX(a0c81f07_8100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x", .parse_fail=true), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
340 
341    /* NonUniform: */
342    /* dEQP-VK.descriptor_indexing.storage_buffer */
343    INSTR_6XX(c0260c0a_0a61b180, "ldib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.z, r1.z"),
344    INSTR_6XX(d0260e0a_09677180, "(sy)stib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.y, r1.w"),
345    /* dEQP-VK.descriptor_indexing.uniform_texel_buffer */
346    INSTR_6XX(a0481f00_40000405, "isaml.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.z, r0.x"),
347    /* dEQP-VK.descriptor_indexing.storage_image */
348    INSTR_6XX(d0360c04_02640b80, "(sy)atomic.b.add.typed.2d.u32.1.nonuniform.base0 r1.x, r0.z, r1.z"),
349    /* dEQP-VK.descriptor_indexing.sampler */
350    INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
351 
352    /* Custom test since we've never seen the blob emit these. */
353    INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
354    INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
355 
356    /* cat7 */
357 
358    /* dEQP-VK.compute.basic.ssbo_local_barrier_single_invocation */
359    INSTR_6XX(e0fa0000_00000000, "fence.g.l.r.w"),
360    INSTR_6XX(e09a0000_00000000, "fence.r.w"),
361    INSTR_6XX(f0420000_00000000, "(sy)bar.g"),
362    /* clang-format on */
363 };
364 
365 static void
trim(char * string)366 trim(char *string)
367 {
368    for (int len = strlen(string); len > 0 && string[len - 1] == '\n'; len--)
369       string[len - 1] = 0;
370 }
371 
372 int
main(int argc,char ** argv)373 main(int argc, char **argv)
374 {
375    int retval = 0;
376    int decode_fails = 0, asm_fails = 0, encode_fails = 0;
377    const int output_size = 4096;
378    char *disasm_output = malloc(output_size);
379    FILE *fdisasm = fmemopen(disasm_output, output_size, "w+");
380    if (!fdisasm) {
381       fprintf(stderr, "failed to fmemopen\n");
382       return 1;
383    }
384 
385    struct ir3_compiler *compilers[10] = {};
386    struct fd_dev_id dev_ids[ARRAY_SIZE(compilers)];
387 
388    for (int i = 0; i < ARRAY_SIZE(tests); i++) {
389       const struct test *test = &tests[i];
390       printf("Testing a%d %s: \"%s\"...\n", test->gpu_id, test->instr,
391              test->expected);
392 
393       rewind(fdisasm);
394       memset(disasm_output, 0, output_size);
395 
396       /*
397        * Test disassembly:
398        */
399 
400       uint32_t code[2] = {
401          strtoll(&test->instr[9], NULL, 16),
402          strtoll(&test->instr[0], NULL, 16),
403       };
404       isa_decode(code, 8, fdisasm,
405                  &(struct isa_decode_options){
406                     .gpu_id = test->gpu_id,
407                     .show_errors = true,
408                  });
409       fflush(fdisasm);
410 
411       trim(disasm_output);
412 
413       if (strcmp(disasm_output, test->expected) != 0) {
414          printf("FAIL: disasm\n");
415          printf("  Expected: \"%s\"\n", test->expected);
416          printf("  Got:      \"%s\"\n", disasm_output);
417          retval = 1;
418          decode_fails++;
419          continue;
420       }
421 
422       /*
423        * Test assembly, which should result in the identical binary:
424        */
425 
426       unsigned gen = test->gpu_id / 100;
427       if (!compilers[gen]) {
428          dev_ids[gen].gpu_id = test->gpu_id;
429          compilers[gen] = ir3_compiler_create(NULL, &dev_ids[gen], false);
430       }
431 
432       FILE *fasm =
433          fmemopen((void *)test->expected, strlen(test->expected), "r");
434 
435       struct ir3_kernel_info info = {};
436       struct ir3_shader *shader = ir3_parse_asm(compilers[gen], &info, fasm);
437       fclose(fasm);
438       if (!shader) {
439          printf("FAIL: %sexpected assembler fail\n",
440                 test->parse_fail ? "" : "un");
441          asm_fails++;
442          /* If this is an instruction that the asm parser is not expected
443           * to handle, don't count it as a fail.
444           */
445          if (!test->parse_fail)
446             retval = 1;
447          continue;
448       } else if (test->parse_fail) {
449          /* If asm parse starts passing, and we don't expect that, flag
450           * it as a fail so we don't forget to update the test vector:
451           */
452          printf(
453             "FAIL: unexpected parse success, please remove '.parse_fail=true'\n");
454          retval = 1;
455       }
456 
457       struct ir3_shader_variant *v = shader->variants;
458       if (memcmp(v->bin, code, sizeof(code))) {
459          printf("FAIL: assembler\n");
460          printf("  Expected: %08x_%08x\n", code[1], code[0]);
461          printf("  Got:      %08x_%08x\n", v->bin[1], v->bin[0]);
462          retval = 1;
463          encode_fails++;
464       }
465 
466       ir3_shader_destroy(shader);
467    }
468 
469    if (decode_fails)
470       printf("%d/%d decode fails\n", decode_fails, (int)ARRAY_SIZE(tests));
471    if (asm_fails)
472       printf("%d/%d assembler fails\n", asm_fails, (int)ARRAY_SIZE(tests));
473    if (encode_fails)
474       printf("%d/%d encode fails\n", encode_fails, (int)ARRAY_SIZE(tests));
475 
476    if (retval) {
477       printf("FAILED!\n");
478    } else {
479       printf("PASSED!\n");
480    }
481 
482    for (unsigned i = 0; i < ARRAY_SIZE(compilers); i++) {
483       if (!compilers[i])
484          continue;
485       ir3_compiler_destroy(compilers[i]);
486    }
487 
488    fclose(fdisasm);
489    free(disasm_output);
490 
491    return retval;
492 }
493