1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 
26 using namespace aco;
27 
28 BEGIN_TEST(assembler.s_memtime)
29    for (unsigned i = GFX6; i <= GFX10; i++) {
30       if (!setup_cs(NULL, (chip_class)i))
31          continue;
32 
33       //~gfx[6-7]>> c7800000
34       //~gfx[6-7]!  bf810000
35       //~gfx[8-9]>> s_memtime s[0:1] ; c0900000 00000000
36       //~gfx10>> s_memtime s[0:1] ; f4900000 fa000000
37       bld.smem(aco_opcode::s_memtime, bld.def(s2)).def(0).setFixed(PhysReg{0});
38 
39       finish_assembler_test();
40    }
41 END_TEST
42 
43 BEGIN_TEST(assembler.branch_3f)
44    if (!setup_cs(NULL, (chip_class)GFX10))
45       return;
46 
47    //! BB0:
48    //! s_branch BB1                                                ; bf820040
49    //! s_nop 0                                                     ; bf800000
50    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
51 
52    for (unsigned i = 0; i < 0x3f; i++)
53       bld.vop1(aco_opcode::v_nop);
54 
55    bld.reset(program->create_and_insert_block());
56 
57    program->blocks[1].linear_preds.push_back(0u);
58 
59    finish_assembler_test();
60 END_TEST
61 
62 BEGIN_TEST(assembler.long_jump.unconditional_forwards)
63    if (!setup_cs(NULL, (chip_class)GFX10))
64       return;
65 
66    //!BB0:
67    //! s_getpc_b64 s[0:1]                                          ; be801f00
68    //! s_addc_u32 s0, s0, 0x20018                                  ; 8200ff00 00020018
69    //! s_addc_u32 s1, s1, 0                                        ; 82018001
70    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
71    //! s_bitset0_b32 s0, 0                                         ; be801b80
72    //! s_setpc_b64 s[0:1]                                          ; be802000
73    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
74 
75    bld.reset(program->create_and_insert_block());
76 
77    //! s_nop 0                                                     ; bf800000
78    //!(then repeated 32767 times)
79    for (unsigned i = 0; i < INT16_MAX + 1; i++)
80       bld.sopp(aco_opcode::s_nop, -1, 0);
81 
82    //! BB2:
83    //! s_endpgm                                                    ; bf810000
84    bld.reset(program->create_and_insert_block());
85 
86    program->blocks[2].linear_preds.push_back(0u);
87    program->blocks[2].linear_preds.push_back(1u);
88 
89    finish_assembler_test();
90 END_TEST
91 
92 BEGIN_TEST(assembler.long_jump.conditional_forwards)
93    if (!setup_cs(NULL, (chip_class)GFX10))
94       return;
95 
96    //! BB0:
97    //! s_cbranch_scc1 BB1                                          ; bf850007
98    //! s_getpc_b64 s[0:1]                                          ; be801f00
99    //! s_addc_u32 s0, s0, 0x20018                                  ; 8200ff00 00020018
100    //! s_addc_u32 s1, s1, 0                                        ; 82018001
101    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
102    //! s_bitset0_b32 s0, 0                                         ; be801b80
103    //! s_setpc_b64 s[0:1]                                          ; be802000
104    bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2);
105 
106    bld.reset(program->create_and_insert_block());
107 
108    //! BB1:
109    //! s_nop 0 ; bf800000
110    //!(then repeated 32767 times)
111    for (unsigned i = 0; i < INT16_MAX + 1; i++)
112       bld.sopp(aco_opcode::s_nop, -1, 0);
113 
114    //! BB2:
115    //! s_endpgm                                                    ; bf810000
116    bld.reset(program->create_and_insert_block());
117 
118    program->blocks[1].linear_preds.push_back(0u);
119    program->blocks[2].linear_preds.push_back(0u);
120    program->blocks[2].linear_preds.push_back(1u);
121 
122    finish_assembler_test();
123 END_TEST
124 
125 BEGIN_TEST(assembler.long_jump.unconditional_backwards)
126    if (!setup_cs(NULL, (chip_class)GFX10))
127       return;
128 
129    //!BB0:
130    //! s_nop 0                                                     ; bf800000
131    //!(then repeated 32767 times)
132    for (unsigned i = 0; i < INT16_MAX + 1; i++)
133       bld.sopp(aco_opcode::s_nop, -1, 0);
134 
135    //! s_getpc_b64 s[0:1]                                          ; be801f00
136    //! s_addc_u32 s0, s0, 0xfffdfffc                               ; 8200ff00 fffdfffc
137    //! s_addc_u32 s1, s1, -1                                       ; 8201c101
138    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
139    //! s_bitset0_b32 s0, 0                                         ; be801b80
140    //! s_setpc_b64 s[0:1]                                          ; be802000
141    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 0);
142 
143    //! BB1:
144    //! s_endpgm                                                    ; bf810000
145    bld.reset(program->create_and_insert_block());
146 
147    program->blocks[0].linear_preds.push_back(0u);
148    program->blocks[1].linear_preds.push_back(0u);
149 
150    finish_assembler_test();
151 END_TEST
152 
153 BEGIN_TEST(assembler.long_jump.conditional_backwards)
154    if (!setup_cs(NULL, (chip_class)GFX10))
155       return;
156 
157    //!BB0:
158    //! s_nop 0                                                     ; bf800000
159    //!(then repeated 32767 times)
160    for (unsigned i = 0; i < INT16_MAX + 1; i++)
161       bld.sopp(aco_opcode::s_nop, -1, 0);
162 
163    //! s_cbranch_execz BB1                                         ; bf880007
164    //! s_getpc_b64 s[0:1]                                          ; be801f00
165    //! s_addc_u32 s0, s0, 0xfffdfff8                               ; 8200ff00 fffdfff8
166    //! s_addc_u32 s1, s1, -1                                       ; 8201c101
167    //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
168    //! s_bitset0_b32 s0, 0                                         ; be801b80
169    //! s_setpc_b64 s[0:1]                                          ; be802000
170    bld.sopp(aco_opcode::s_cbranch_execnz, Definition(PhysReg(0), s2), 0);
171 
172    //! BB1:
173    //! s_endpgm                                                    ; bf810000
174    bld.reset(program->create_and_insert_block());
175 
176    program->blocks[0].linear_preds.push_back(0u);
177    program->blocks[1].linear_preds.push_back(0u);
178 
179    finish_assembler_test();
180 END_TEST
181 
182 BEGIN_TEST(assembler.long_jump.3f)
183    if (!setup_cs(NULL, (chip_class)GFX10))
184       return;
185 
186    //! BB0:
187    //! s_branch BB1                                                ; bf820040
188    //! s_nop 0                                                     ; bf800000
189    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
190 
191    for (unsigned i = 0; i < 0x3f - 7; i++) // a unconditional long jump is 7 dwords
192       bld.vop1(aco_opcode::v_nop);
193    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
194 
195    bld.reset(program->create_and_insert_block());
196    for (unsigned i = 0; i < INT16_MAX + 1; i++)
197       bld.vop1(aco_opcode::v_nop);
198    bld.reset(program->create_and_insert_block());
199 
200    program->blocks[1].linear_preds.push_back(0u);
201    program->blocks[2].linear_preds.push_back(0u);
202    program->blocks[2].linear_preds.push_back(1u);
203 
204    finish_assembler_test();
205 END_TEST
206 
207 BEGIN_TEST(assembler.long_jump.constaddr)
208    if (!setup_cs(NULL, (chip_class)GFX10))
209       return;
210 
211    //>> s_getpc_b64 s[0:1]                                          ; be801f00
212    bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
213 
214    bld.reset(program->create_and_insert_block());
215 
216    for (unsigned i = 0; i < INT16_MAX + 1; i++)
217       bld.sopp(aco_opcode::s_nop, -1, 0);
218 
219    bld.reset(program->create_and_insert_block());
220 
221    //>> s_getpc_b64 s[0:1]                                          ; be801f00
222    //! s_add_u32 s0, s0, 0xe0                                      ; 8000ff00 000000e0
223    bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero());
224    bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
225             Operand(PhysReg(0), s1), Operand::zero());
226 
227    program->blocks[2].linear_preds.push_back(0u);
228    program->blocks[2].linear_preds.push_back(1u);
229 
230    finish_assembler_test();
231 END_TEST
232 
233 BEGIN_TEST(assembler.v_add3)
234    for (unsigned i = GFX9; i <= GFX10; i++) {
235       if (!setup_cs(NULL, (chip_class)i))
236          continue;
237 
238       //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
239       //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
240       aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
241       add3->operands[0] = Operand::zero();
242       add3->operands[1] = Operand::zero();
243       add3->operands[2] = Operand::zero();
244       add3->definitions[0] = Definition(PhysReg(0), v1);
245       bld.insert(std::move(add3));
246 
247       finish_assembler_test();
248    }
249 END_TEST
250 
251 BEGIN_TEST(assembler.v_add3_clamp)
252    for (unsigned i = GFX9; i <= GFX10; i++) {
253       if (!setup_cs(NULL, (chip_class)i))
254          continue;
255 
256       //~gfx9>> integer addition + clamp ; d1ff8000 02010080
257       //~gfx10>> integer addition + clamp ; d76d8000 02010080
258       aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
259       add3->operands[0] = Operand::zero();
260       add3->operands[1] = Operand::zero();
261       add3->operands[2] = Operand::zero();
262       add3->definitions[0] = Definition(PhysReg(0), v1);
263       add3->clamp = 1;
264       bld.insert(std::move(add3));
265 
266       finish_assembler_test();
267    }
268 END_TEST
269