1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 #include "helpers.h"
25 
26 using namespace aco;
27 
28 BEGIN_TEST(to_hw_instr.swap_subdword)
29    PhysReg v0_lo{256};
30    PhysReg v0_hi{256};
31    PhysReg v0_b1{256};
32    PhysReg v0_b3{256};
33    PhysReg v1_lo{257};
34    PhysReg v1_hi{257};
35    PhysReg v1_b1{257};
36    PhysReg v1_b3{257};
37    PhysReg v2_lo{258};
38    PhysReg v3_lo{259};
39    v0_hi.reg_b += 2;
40    v1_hi.reg_b += 2;
41    v0_b1.reg_b += 1;
42    v1_b1.reg_b += 1;
43    v0_b3.reg_b += 3;
44    v1_b3.reg_b += 3;
45 
46    for (unsigned i = GFX6; i <= GFX7; i++) {
47       if (!setup_cs(NULL, (chip_class)i))
48          continue;
49 
50       //~gfx[67]>>  p_unit_test 0
51       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
52       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
53       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
54       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
55       bld.pseudo(aco_opcode::p_parallelcopy,
56                  Definition(v0_lo, v2b), Definition(v1_lo, v2b),
57                  Operand(v1_lo, v2b), Operand(v0_lo, v2b));
58 
59       //~gfx[67]! p_unit_test 1
60       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
61       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
62       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
63       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
64       bld.pseudo(aco_opcode::p_create_vector,
65                  Definition(v0_lo, v1),
66                  Operand(v1_lo, v2b), Operand(v0_lo, v2b));
67 
68       //~gfx[67]! p_unit_test 2
69       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
70       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
71       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
72       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
73       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
74       bld.pseudo(aco_opcode::p_create_vector,
75                  Definition(v0_lo, v6b), Operand(v1_lo, v2b),
76                  Operand(v0_lo, v2b), Operand(v2_lo, v2b));
77 
78       //~gfx[67]! p_unit_test 3
79       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
80       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
81       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
82       //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
83       //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
84       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
85       bld.pseudo(aco_opcode::p_create_vector,
86                  Definition(v0_lo, v2),
87                  Operand(v1_lo, v2b), Operand(v0_lo, v2b),
88                  Operand(v2_lo, v2b), Operand(v3_lo, v2b));
89 
90       //~gfx[67]! p_unit_test 4
91       //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
92       //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 2
93       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
94       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 2
95       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
96       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
97       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
98       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
99       bld.pseudo(aco_opcode::p_create_vector,
100                  Definition(v0_lo, v2),
101                  Operand(v1_lo, v2b), Operand(v2_lo, v2b),
102                  Operand(v0_lo, v2b), Operand(v3_lo, v2b));
103 
104       //~gfx[67]! p_unit_test 5
105       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
106       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
107       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
108       bld.pseudo(aco_opcode::p_split_vector,
109                  Definition(v1_lo, v2b), Definition(v0_lo, v2b),
110                  Operand(v0_lo, v1));
111 
112       //~gfx[67]! p_unit_test 6
113       //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]
114       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
115       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
116       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
117       bld.pseudo(aco_opcode::p_split_vector,
118                  Definition(v1_lo, v2b), Definition(v0_lo, v2b),
119                  Definition(v2_lo, v2b), Operand(v0_lo, v6b));
120 
121       //~gfx[67]! p_unit_test 7
122       //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]
123       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
124       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
125       //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]
126       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
127       bld.pseudo(aco_opcode::p_split_vector,
128                  Definition(v1_lo, v2b), Definition(v0_lo, v2b),
129                  Definition(v2_lo, v2b), Definition(v3_lo, v2b),
130                  Operand(v0_lo, v2));
131 
132       //~gfx[67]! p_unit_test 8
133       //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32]
134       //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
135       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
136       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
137       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
138       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
139       bld.pseudo(aco_opcode::p_split_vector,
140                  Definition(v1_lo, v2b), Definition(v2_lo, v2b),
141                  Definition(v0_lo, v2b), Definition(v3_lo, v2b),
142                  Operand(v0_lo, v2));
143 
144       //~gfx[67]! p_unit_test 9
145       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
146       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
147       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
148       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
149       bld.pseudo(aco_opcode::p_parallelcopy,
150                  Definition(v0_lo, v1b), Definition(v1_lo, v1b),
151                  Operand(v1_lo, v1b), Operand(v0_lo, v1b));
152 
153       //~gfx[67]! p_unit_test 10
154       //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
155       //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
156       //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
157       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
158       bld.pseudo(aco_opcode::p_create_vector,
159                  Definition(v0_lo, v2b),
160                  Operand(v1_lo, v1b), Operand(v0_lo, v1b));
161 
162       //~gfx[67]! p_unit_test 11
163       //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
164       //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
165       //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
166       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
167       //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
168       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
169       bld.pseudo(aco_opcode::p_create_vector,
170                  Definition(v0_lo, v3b), Operand(v1_lo, v1b),
171                  Operand(v0_lo, v1b), Operand(v2_lo, v1b));
172 
173       //~gfx[67]! p_unit_test 12
174       //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
175       //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
176       //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
177       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
178       //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
179       //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]
180       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1
181       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
182       bld.pseudo(aco_opcode::p_create_vector,
183                  Definition(v0_lo, v1),
184                  Operand(v1_lo, v1b), Operand(v0_lo, v1b),
185                  Operand(v2_lo, v1b), Operand(v3_lo, v1b));
186 
187       //~gfx[67]! p_unit_test 13
188       //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8]
189       //~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8]
190       //~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16]
191       //~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8]
192       //~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]
193       //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001
194       //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]
195       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
196       Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector,
197                                        Definition(v0_lo, v1),
198                                        Operand(v0_lo, v1b), Operand(v0_lo, v1b),
199                                        Operand(v0_lo, v1b), Operand(v0_lo, v1b));
200       pseudo->pseudo().scratch_sgpr = m0;
201 
202       //~gfx[67]! p_unit_test 14
203       //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
204       //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
205       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
206       bld.pseudo(aco_opcode::p_split_vector,
207                  Definition(v1_lo, v1b), Definition(v0_lo, v1b),
208                  Operand(v0_lo, v2b));
209 
210       //~gfx[67]! p_unit_test 15
211       //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
212       //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
213       //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]
214       //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]
215       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
216       bld.pseudo(aco_opcode::p_split_vector,
217                  Definition(v1_lo, v1b), Definition(v0_lo, v1b),
218                  Definition(v2_lo, v1b), Definition(v3_lo, v1b),
219                  Operand(v0_lo, v1));
220 
221       //~gfx[67]! s_endpgm
222 
223       finish_to_hw_instr_test();
224    }
225 
226    for (unsigned i = GFX8; i <= GFX9; i++) {
227       if (!setup_cs(NULL, (chip_class)i))
228          continue;
229 
230       //~gfx[89]>> p_unit_test 0
231       //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
232       //~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
233       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
234       bld.pseudo(aco_opcode::p_parallelcopy,
235                  Definition(v0_lo, v2b), Definition(v0_hi, v2b),
236                  Operand(v0_hi, v2b), Operand(v0_lo, v2b));
237 
238       //~gfx[89]! p_unit_test 1
239       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
240       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
241       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
242       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
243       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
244       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
245       bld.pseudo(aco_opcode::p_parallelcopy,
246                  Definition(v0_lo, v1), Definition(v1_lo, v2b),
247                  Operand(v1_lo, v1), Operand(v0_lo, v2b));
248 
249       //~gfx[89]! p_unit_test 2
250       //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
251       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_sel:uword1 dst_preserve src0_sel:uword0
252       //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0
253       //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0
254       //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0
255       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
256       bld.pseudo(aco_opcode::p_parallelcopy,
257                  Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b),
258                  Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b));
259 
260       //~gfx[89]! p_unit_test 3
261       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
262       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
263       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
264       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
265       //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0
266       //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2
267       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
268       bld.pseudo(aco_opcode::p_parallelcopy,
269                  Definition(v0_lo, v1), Definition(v1_b3, v1b),
270                  Operand(v1_lo, v1), Operand(v0_b3, v1b));
271 
272       //~gfx[89]! p_unit_test 4
273       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
274       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
275       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
276       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
277       //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1
278       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
279       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
280       bld.pseudo(aco_opcode::p_parallelcopy,
281                  Definition(v0_lo, v1), Definition(v1_lo, v1b),
282                  Operand(v1_lo, v1), Operand(v0_lo, v1b));
283 
284       //~gfx[89]! p_unit_test 5
285       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
286       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]
287       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
288       //~gfx9! v1: %0:v[1],  v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
289       //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1
290       //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
291       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
292       bld.pseudo(aco_opcode::p_parallelcopy,
293                  Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),
294                  Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1));
295 
296       //~gfx[89]! p_unit_test 6
297       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
298       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
299       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
300       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
301       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
302       bld.pseudo(aco_opcode::p_parallelcopy,
303                  Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
304                  Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1));
305 
306       //~gfx[89]! p_unit_test 7
307       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
308       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]
309       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
310       //~gfx9! v1: %0:v[1],  v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
311       //~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
312       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
313       bld.pseudo(aco_opcode::p_parallelcopy,
314                  Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
315                  Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1));
316 
317       //~gfx[89]! p_unit_test 8
318       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
319       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
320       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
321       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
322       //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3
323       //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3
324       //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3
325       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
326       bld.pseudo(aco_opcode::p_parallelcopy,
327                  Definition(v0_lo, v3b), Definition(v1_lo, v3b),
328                  Operand(v1_lo, v3b), Operand(v0_lo, v3b));
329 
330       //~gfx[89]! p_unit_test 9
331       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
332       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
333       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
334       //~gfx9! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
335       //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
336       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
337       bld.pseudo(aco_opcode::p_parallelcopy,
338                  Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),
339                  Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b));
340 
341       //~gfx[89]! p_unit_test 10
342       //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
343       //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
344       //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
345       //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2
346       //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2
347       //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2
348       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
349       bld.pseudo(aco_opcode::p_parallelcopy,
350                  Definition(v0_b1, v2b), Definition(v1_b1, v2b),
351                  Operand(v1_b1, v2b), Operand(v0_b1, v2b));
352 
353       //~gfx[89]! p_unit_test 11
354       //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_sel:uword0 dst_preserve src0_sel:uword1
355       //~gfx[89]! v1: %0:v[0] = v_mov_b32 42
356       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
357       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
358                  Operand::c32(42u), Operand(v0_hi, v2b));
359 
360       //~gfx[89]! s_endpgm
361 
362       finish_to_hw_instr_test();
363    }
364 END_TEST
365 
366 BEGIN_TEST(to_hw_instr.subdword_constant)
367    PhysReg v0_lo{256};
368    PhysReg v0_hi{256};
369    PhysReg v0_b1{256};
370    PhysReg v1_lo{257};
371    PhysReg v1_hi{257};
372    v0_hi.reg_b += 2;
373    v0_b1.reg_b += 1;
374    v1_hi.reg_b += 2;
375 
376    for (unsigned i = GFX9; i <= GFX10; i++) {
377       if (!setup_cs(NULL, (chip_class)i))
378          continue;
379 
380       /* 16-bit pack */
381       //>> p_unit_test 0
382       //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32])
383       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
384       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
385                  Operand::c16(0x3800), Operand(v1_hi, v2b));
386 
387       //! p_unit_test 1
388       //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32]
389       //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0]
390       //~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32])
391       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
392       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
393                  Operand::c16(0x4205), Operand(v1_hi, v2b));
394 
395       //! p_unit_test 2
396       //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
397       //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
398       //~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16]
399       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
400       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
401                  Operand::c16(0x4205), Operand(v0_lo, v2b));
402 
403       //! p_unit_test 3
404       //! v1: %_:v[0] = v_mov_b32 0x3c003800
405       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
406       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
407                  Operand::c16(0x3800), Operand::c16(0x3c00));
408 
409       //! p_unit_test 4
410       //! v1: %_:v[0] = v_mov_b32 0x43064205
411       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
412       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
413                  Operand::c16(0x4205), Operand::c16(0x4306));
414 
415       //! p_unit_test 5
416       //! v1: %_:v[0] = v_mov_b32 0x38004205
417       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
418       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
419                  Operand::c16(0x4205), Operand::c16(0x3800));
420 
421       /* 16-bit copy */
422       //! p_unit_test 6
423       //! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:dword
424       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
425       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800));
426 
427       //! p_unit_test 7
428       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
429       //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
430       //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32])
431       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
432       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205));
433 
434       //! p_unit_test 8
435       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
436       //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0]
437       //~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205
438       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
439       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205));
440 
441       //! p_unit_test 9
442       //! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_sel:ubyte1 dst_preserve src0_sel:dword
443       //! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_sel:ubyte2 dst_preserve src0_sel:dword
444       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
445       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800));
446 
447       //! p_unit_test 10
448       //! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_sel:ubyte1 dst_preserve src0_sel:dword
449       //! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_sel:ubyte2 dst_preserve src0_sel:dword src1_sel:dword
450       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
451       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205));
452 
453       /* 8-bit copy */
454       //! p_unit_test 11
455       //! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_sel:ubyte0 dst_preserve src0_sel:dword src1_sel:dword
456       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
457       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42));
458 
459       /* 32-bit and 8-bit copy */
460       //! p_unit_test 12
461       //! v1: %_:v[0] = v_mov_b32 0
462       //! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
463       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
464       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
465                  Operand::zero(), Operand::zero(1));
466 
467       //! s_endpgm
468 
469       finish_to_hw_instr_test();
470    }
471 END_TEST
472 
473 BEGIN_TEST(to_hw_instr.self_intersecting_swap)
474    if (!setup_cs(NULL, GFX9))
475       return;
476 
477    PhysReg reg_v1{257};
478    PhysReg reg_v2{258};
479    PhysReg reg_v3{259};
480    PhysReg reg_v7{263};
481 
482    //>> p_unit_test 0
483    //! v1: %0:v[1],  v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1]
484    //! v1: %0:v[2],  v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2]
485    //! v1: %0:v[3],  v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]
486    //! s_endpgm
487    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
488    //v[1:2] = v[2:3]
489    //v3 = v7
490    //v7 = v1
491    bld.pseudo(aco_opcode::p_parallelcopy,
492               Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1),
493               Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1));
494 
495    finish_to_hw_instr_test();
496 END_TEST
497 
498 BEGIN_TEST(to_hw_instr.extract)
499    PhysReg s0_lo{0};
500    PhysReg s1_lo{1};
501    PhysReg v0_lo{256};
502    PhysReg v1_lo{257};
503 
504    for (unsigned i = GFX7; i <= GFX9; i++) {
505    for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
506       if (!setup_cs(NULL, (chip_class)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
507          continue;
508 
509 #define EXT(idx, size)                                                                             \
510    bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \
511               Operand::c32(size), Operand::c32(is_signed));
512 
513       //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
514       //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
515       //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
516       //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
517       //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n)
518 
519       //>> p_unit_test 0
520       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
521       //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
522       EXT(0, 8)
523       //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
524       EXT(1, 8)
525       //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
526       EXT(2, 8)
527       //! v1: %_:v[0] = @v_shr 24, %_:v[1]
528       EXT(3, 8)
529       //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
530       EXT(0, 16)
531       //! v1: %_:v[0] = @v_shr 16, %_:v[1]
532       EXT(1, 16)
533 
534       #undef EXT
535 
536 #define EXT(idx, size)                                                                             \
537    bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1),                   \
538               Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed));
539 
540       //>> p_unit_test 2
541       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
542       //~gfx._unsigned! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80000
543       //~gfx._signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
544       EXT(0, 8)
545       //! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80008
546       EXT(1, 8)
547       //! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80010
548       EXT(2, 8)
549       //! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 24
550       EXT(3, 8)
551       //~gfx._unsigned! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x100000
552       //~gfx._signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
553       EXT(0, 16)
554       //! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 16
555       EXT(1, 16)
556 
557       #undef EXT
558 
559 #define EXT(idx, src_b)                                                                            \
560    bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b),   \
561               Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed));
562 
563       //>> p_unit_test 4
564       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
565       //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
566       //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
567       EXT(0, 0)
568       //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
569       if (i != GFX7)
570          EXT(0, 2)
571       //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
572       //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
573       EXT(1, 0)
574       //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
575       if (i != GFX7)
576          EXT(1, 2)
577 
578       #undef EXT
579 
580       finish_to_hw_instr_test();
581 
582       //! s_endpgm
583    }
584    }
585 END_TEST
586 
587 BEGIN_TEST(to_hw_instr.insert)
588    PhysReg s0_lo{0};
589    PhysReg s1_lo{1};
590    PhysReg v0_lo{256};
591    PhysReg v1_lo{257};
592 
593    for (unsigned i = GFX7; i <= GFX9; i++) {
594       if (!setup_cs(NULL, (chip_class)i))
595          continue;
596 
597 #define INS(idx, size)                                                                             \
598    bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx),  \
599               Operand::c32(size));
600 
601       //>> p_unit_test 0
602       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
603       //! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8
604       INS(0, 8)
605       //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
606       //~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0]
607       //~gfx[^7]! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1 src0_sel:dword
608       INS(1, 8)
609       //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
610       //~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0]
611       //~gfx[^7]! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2 src0_sel:dword
612       INS(2, 8)
613       //! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1]
614       INS(3, 8)
615       //! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16
616       INS(0, 16)
617       //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1]
618       INS(1, 16)
619 
620       #undef INS
621 
622 #define INS(idx, size)                                                                             \
623    bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1),                    \
624               Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size));
625 
626       //>> p_unit_test 1
627       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
628       //! s1: %_:s[0],  s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
629       INS(0, 8)
630       //! s1: %_:s[0],  s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
631       //! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[0], 8
632       INS(1, 8)
633       //! s1: %_:s[0],  s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
634       //! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[0], 16
635       INS(2, 8)
636       //! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[1], 24
637       INS(3, 8)
638       //! s1: %_:s[0],  s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000
639       INS(0, 16)
640       //! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[1], 16
641       INS(1, 16)
642 
643       #undef INS
644 
645 #define INS(idx, def_b)                                                                            \
646    bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b),    \
647               Operand::c32(idx), Operand::c32(8u));
648 
649       //>> p_unit_test 2
650       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
651       //~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8
652       //~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
653       INS(0, 0)
654       //~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
655       if (i != GFX7)
656          INS(0, 2)
657       //~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16]
658       //~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
659       INS(1, 0)
660       //~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
661       if (i != GFX7)
662          INS(1, 2)
663 
664       #undef INS
665 
666       finish_to_hw_instr_test();
667 
668       //! s_endpgm
669    }
670 END_TEST
671 
672 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc)
673    if (!setup_cs(NULL, GFX10))
674       return;
675 
676    PhysReg reg_s0{0};
677    PhysReg reg_s1{1};
678    PhysReg v0_lo{256};
679    PhysReg v0_b3{256};
680    v0_b3.reg_b += 3;
681    PhysReg v1_lo{257};
682 
683    //>> p_unit_test 0
684    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
685 
686    /* It would be better if the scc=s0 copy was done later, but handle_operands() is complex
687     * enough
688     */
689 
690    //! s1: %0:scc = s_cmp_lg_i32 %0:s[0], 0
691    //! s1: %0:m0 = s_mov_b32 %0:scc
692    //! lv1: %0:v[0] = v_mov_b32 %0:v[1]
693    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
694    //! lv1: %0:v[0] = v_mov_b32 %0:v[1]
695    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
696    //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0
697    Instruction *instr = bld.pseudo(
698       aco_opcode::p_parallelcopy,
699       Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
700       Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
701    instr->pseudo().scratch_sgpr = m0;
702 
703    finish_to_hw_instr_test();
704 END_TEST
705 
706 BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
707    if (!setup_cs(NULL, GFX10))
708       return;
709 
710    PhysReg reg_v0{256};
711    PhysReg reg_v1{257};
712    RegClass v1_linear = v1.as_linear();
713 
714    //>> p_unit_test 0
715    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
716 
717    Instruction *instr = bld.pseudo(
718       aco_opcode::p_parallelcopy,
719       Definition(reg_v0, v1_linear), Definition(reg_v1, v1_linear),
720       Operand(reg_v1, v1_linear), Operand(reg_v0, v1_linear));
721    instr->pseudo().scratch_sgpr = m0;
722 
723    finish_to_hw_instr_test();
724 END_TEST
725