1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 #include "helpers.h" 25 26 using namespace aco; 27 28 BEGIN_TEST(to_hw_instr.swap_subdword) 29 PhysReg v0_lo{256}; 30 PhysReg v0_hi{256}; 31 PhysReg v0_b1{256}; 32 PhysReg v0_b3{256}; 33 PhysReg v1_lo{257}; 34 PhysReg v1_hi{257}; 35 PhysReg v1_b1{257}; 36 PhysReg v1_b3{257}; 37 PhysReg v2_lo{258}; 38 PhysReg v3_lo{259}; 39 v0_hi.reg_b += 2; 40 v1_hi.reg_b += 2; 41 v0_b1.reg_b += 1; 42 v1_b1.reg_b += 1; 43 v0_b3.reg_b += 3; 44 v1_b3.reg_b += 3; 45 46 for (unsigned i = GFX6; i <= GFX7; i++) { 47 if (!setup_cs(NULL, (chip_class)i)) 48 continue; 49 50 //~gfx[67]>> p_unit_test 0 51 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 52 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 53 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 54 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 55 bld.pseudo(aco_opcode::p_parallelcopy, 56 Definition(v0_lo, v2b), Definition(v1_lo, v2b), 57 Operand(v1_lo, v2b), Operand(v0_lo, v2b)); 58 59 //~gfx[67]! p_unit_test 1 60 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 61 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 62 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 63 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 64 bld.pseudo(aco_opcode::p_create_vector, 65 Definition(v0_lo, v1), 66 Operand(v1_lo, v2b), Operand(v0_lo, v2b)); 67 68 //~gfx[67]! p_unit_test 2 69 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 70 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 71 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 72 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16] 73 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 74 bld.pseudo(aco_opcode::p_create_vector, 75 Definition(v0_lo, v6b), Operand(v1_lo, v2b), 76 Operand(v0_lo, v2b), Operand(v2_lo, v2b)); 77 78 //~gfx[67]! p_unit_test 3 79 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 80 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 81 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 82 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16] 83 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2 84 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 85 bld.pseudo(aco_opcode::p_create_vector, 86 Definition(v0_lo, v2), 87 Operand(v1_lo, v2b), Operand(v0_lo, v2b), 88 Operand(v2_lo, v2b), Operand(v3_lo, v2b)); 89 90 //~gfx[67]! p_unit_test 4 91 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16] 92 //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 2 93 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 94 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 2 95 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 96 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 97 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 98 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 99 bld.pseudo(aco_opcode::p_create_vector, 100 Definition(v0_lo, v2), 101 Operand(v1_lo, v2b), Operand(v2_lo, v2b), 102 Operand(v0_lo, v2b), Operand(v3_lo, v2b)); 103 104 //~gfx[67]! p_unit_test 5 105 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 106 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 107 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 108 bld.pseudo(aco_opcode::p_split_vector, 109 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 110 Operand(v0_lo, v1)); 111 112 //~gfx[67]! p_unit_test 6 113 //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] 114 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 115 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 116 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 117 bld.pseudo(aco_opcode::p_split_vector, 118 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 119 Definition(v2_lo, v2b), Operand(v0_lo, v6b)); 120 121 //~gfx[67]! p_unit_test 7 122 //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] 123 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] 124 //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 125 //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32] 126 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 127 bld.pseudo(aco_opcode::p_split_vector, 128 Definition(v1_lo, v2b), Definition(v0_lo, v2b), 129 Definition(v2_lo, v2b), Definition(v3_lo, v2b), 130 Operand(v0_lo, v2)); 131 132 //~gfx[67]! p_unit_test 8 133 //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32] 134 //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] 135 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 136 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 137 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 138 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 139 bld.pseudo(aco_opcode::p_split_vector, 140 Definition(v1_lo, v2b), Definition(v2_lo, v2b), 141 Definition(v0_lo, v2b), Definition(v3_lo, v2b), 142 Operand(v0_lo, v2)); 143 144 //~gfx[67]! p_unit_test 9 145 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 146 //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 147 //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 148 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 149 bld.pseudo(aco_opcode::p_parallelcopy, 150 Definition(v0_lo, v1b), Definition(v1_lo, v1b), 151 Operand(v1_lo, v1b), Operand(v0_lo, v1b)); 152 153 //~gfx[67]! p_unit_test 10 154 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 155 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 156 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 157 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 158 bld.pseudo(aco_opcode::p_create_vector, 159 Definition(v0_lo, v2b), 160 Operand(v1_lo, v1b), Operand(v0_lo, v1b)); 161 162 //~gfx[67]! p_unit_test 11 163 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 164 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 165 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 166 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 167 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 168 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 169 bld.pseudo(aco_opcode::p_create_vector, 170 Definition(v0_lo, v3b), Operand(v1_lo, v1b), 171 Operand(v0_lo, v1b), Operand(v2_lo, v1b)); 172 173 //~gfx[67]! p_unit_test 12 174 //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] 175 //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 176 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] 177 //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 178 //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 179 //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24] 180 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1 181 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 182 bld.pseudo(aco_opcode::p_create_vector, 183 Definition(v0_lo, v1), 184 Operand(v1_lo, v1b), Operand(v0_lo, v1b), 185 Operand(v2_lo, v1b), Operand(v3_lo, v1b)); 186 187 //~gfx[67]! p_unit_test 13 188 //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8] 189 //~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8] 190 //~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16] 191 //~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8] 192 //~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] 193 //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001 194 //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8] 195 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); 196 Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector, 197 Definition(v0_lo, v1), 198 Operand(v0_lo, v1b), Operand(v0_lo, v1b), 199 Operand(v0_lo, v1b), Operand(v0_lo, v1b)); 200 pseudo->pseudo().scratch_sgpr = m0; 201 202 //~gfx[67]! p_unit_test 14 203 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] 204 //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] 205 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); 206 bld.pseudo(aco_opcode::p_split_vector, 207 Definition(v1_lo, v1b), Definition(v0_lo, v1b), 208 Operand(v0_lo, v2b)); 209 210 //~gfx[67]! p_unit_test 15 211 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] 212 //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] 213 //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24] 214 //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32] 215 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); 216 bld.pseudo(aco_opcode::p_split_vector, 217 Definition(v1_lo, v1b), Definition(v0_lo, v1b), 218 Definition(v2_lo, v1b), Definition(v3_lo, v1b), 219 Operand(v0_lo, v1)); 220 221 //~gfx[67]! s_endpgm 222 223 finish_to_hw_instr_test(); 224 } 225 226 for (unsigned i = GFX8; i <= GFX9; i++) { 227 if (!setup_cs(NULL, (chip_class)i)) 228 continue; 229 230 //~gfx[89]>> p_unit_test 0 231 //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 232 //~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16] 233 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 234 bld.pseudo(aco_opcode::p_parallelcopy, 235 Definition(v0_lo, v2b), Definition(v0_hi, v2b), 236 Operand(v0_hi, v2b), Operand(v0_lo, v2b)); 237 238 //~gfx[89]! p_unit_test 1 239 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 240 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 241 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 242 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 243 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 244 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 245 bld.pseudo(aco_opcode::p_parallelcopy, 246 Definition(v0_lo, v1), Definition(v1_lo, v2b), 247 Operand(v1_lo, v1), Operand(v0_lo, v2b)); 248 249 //~gfx[89]! p_unit_test 2 250 //~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 251 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_sel:uword1 dst_preserve src0_sel:uword0 252 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 253 //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 254 //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:uword0 255 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 256 bld.pseudo(aco_opcode::p_parallelcopy, 257 Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b), 258 Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b)); 259 260 //~gfx[89]! p_unit_test 3 261 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 262 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 263 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 264 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 265 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0 266 //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 267 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 268 bld.pseudo(aco_opcode::p_parallelcopy, 269 Definition(v0_lo, v1), Definition(v1_b3, v1b), 270 Operand(v1_lo, v1), Operand(v0_b3, v1b)); 271 272 //~gfx[89]! p_unit_test 4 273 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 274 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 275 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 276 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 277 //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 278 //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1 279 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 280 bld.pseudo(aco_opcode::p_parallelcopy, 281 Definition(v0_lo, v1), Definition(v1_lo, v1b), 282 Operand(v1_lo, v1), Operand(v0_lo, v1b)); 283 284 //~gfx[89]! p_unit_test 5 285 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 286 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 287 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 288 //~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 289 //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 290 //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 291 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 292 bld.pseudo(aco_opcode::p_parallelcopy, 293 Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1), 294 Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1)); 295 296 //~gfx[89]! p_unit_test 6 297 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 298 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 299 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 300 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 301 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 302 bld.pseudo(aco_opcode::p_parallelcopy, 303 Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), 304 Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1)); 305 306 //~gfx[89]! p_unit_test 7 307 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 308 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1] 309 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] 310 //~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] 311 //~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 312 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 313 bld.pseudo(aco_opcode::p_parallelcopy, 314 Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), 315 Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1)); 316 317 //~gfx[89]! p_unit_test 8 318 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 319 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 320 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 321 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 322 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 323 //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 324 //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte3 325 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 326 bld.pseudo(aco_opcode::p_parallelcopy, 327 Definition(v0_lo, v3b), Definition(v1_lo, v3b), 328 Operand(v1_lo, v3b), Operand(v0_lo, v3b)); 329 330 //~gfx[89]! p_unit_test 9 331 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 332 //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] 333 //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] 334 //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] 335 //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 336 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 337 bld.pseudo(aco_opcode::p_parallelcopy, 338 Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b), 339 Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b)); 340 341 //~gfx[89]! p_unit_test 10 342 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 343 //~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 344 //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1 345 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 346 //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 347 //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2 src1_sel:ubyte2 348 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 349 bld.pseudo(aco_opcode::p_parallelcopy, 350 Definition(v0_b1, v2b), Definition(v1_b1, v2b), 351 Operand(v1_b1, v2b), Operand(v0_b1, v2b)); 352 353 //~gfx[89]! p_unit_test 11 354 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_sel:uword0 dst_preserve src0_sel:uword1 355 //~gfx[89]! v1: %0:v[0] = v_mov_b32 42 356 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 357 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), 358 Operand::c32(42u), Operand(v0_hi, v2b)); 359 360 //~gfx[89]! s_endpgm 361 362 finish_to_hw_instr_test(); 363 } 364 END_TEST 365 366 BEGIN_TEST(to_hw_instr.subdword_constant) 367 PhysReg v0_lo{256}; 368 PhysReg v0_hi{256}; 369 PhysReg v0_b1{256}; 370 PhysReg v1_lo{257}; 371 PhysReg v1_hi{257}; 372 v0_hi.reg_b += 2; 373 v0_b1.reg_b += 1; 374 v1_hi.reg_b += 2; 375 376 for (unsigned i = GFX9; i <= GFX10; i++) { 377 if (!setup_cs(NULL, (chip_class)i)) 378 continue; 379 380 /* 16-bit pack */ 381 //>> p_unit_test 0 382 //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32]) 383 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 384 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 385 Operand::c16(0x3800), Operand(v1_hi, v2b)); 386 387 //! p_unit_test 1 388 //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32] 389 //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0] 390 //~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32]) 391 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 392 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 393 Operand::c16(0x4205), Operand(v1_hi, v2b)); 394 395 //! p_unit_test 2 396 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] 397 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 398 //~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16] 399 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 400 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 401 Operand::c16(0x4205), Operand(v0_lo, v2b)); 402 403 //! p_unit_test 3 404 //! v1: %_:v[0] = v_mov_b32 0x3c003800 405 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); 406 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 407 Operand::c16(0x3800), Operand::c16(0x3c00)); 408 409 //! p_unit_test 4 410 //! v1: %_:v[0] = v_mov_b32 0x43064205 411 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 412 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 413 Operand::c16(0x4205), Operand::c16(0x4306)); 414 415 //! p_unit_test 5 416 //! v1: %_:v[0] = v_mov_b32 0x38004205 417 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); 418 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), 419 Operand::c16(0x4205), Operand::c16(0x3800)); 420 421 /* 16-bit copy */ 422 //! p_unit_test 6 423 //! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_sel:uword0 dst_preserve src0_sel:uword0 src1_sel:dword 424 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); 425 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800)); 426 427 //! p_unit_test 7 428 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] 429 //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] 430 //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32]) 431 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); 432 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205)); 433 434 //! p_unit_test 8 435 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] 436 //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0] 437 //~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205 438 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); 439 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205)); 440 441 //! p_unit_test 9 442 //! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_sel:ubyte1 dst_preserve src0_sel:dword 443 //! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_sel:ubyte2 dst_preserve src0_sel:dword 444 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); 445 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800)); 446 447 //! p_unit_test 10 448 //! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_sel:ubyte1 dst_preserve src0_sel:dword 449 //! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_sel:ubyte2 dst_preserve src0_sel:dword src1_sel:dword 450 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); 451 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205)); 452 453 /* 8-bit copy */ 454 //! p_unit_test 11 455 //! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_sel:ubyte0 dst_preserve src0_sel:dword src1_sel:dword 456 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); 457 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42)); 458 459 /* 32-bit and 8-bit copy */ 460 //! p_unit_test 12 461 //! v1: %_:v[0] = v_mov_b32 0 462 //! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword 463 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); 464 bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), 465 Operand::zero(), Operand::zero(1)); 466 467 //! s_endpgm 468 469 finish_to_hw_instr_test(); 470 } 471 END_TEST 472 473 BEGIN_TEST(to_hw_instr.self_intersecting_swap) 474 if (!setup_cs(NULL, GFX9)) 475 return; 476 477 PhysReg reg_v1{257}; 478 PhysReg reg_v2{258}; 479 PhysReg reg_v3{259}; 480 PhysReg reg_v7{263}; 481 482 //>> p_unit_test 0 483 //! v1: %0:v[1], v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1] 484 //! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2] 485 //! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3] 486 //! s_endpgm 487 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 488 //v[1:2] = v[2:3] 489 //v3 = v7 490 //v7 = v1 491 bld.pseudo(aco_opcode::p_parallelcopy, 492 Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1), 493 Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1)); 494 495 finish_to_hw_instr_test(); 496 END_TEST 497 498 BEGIN_TEST(to_hw_instr.extract) 499 PhysReg s0_lo{0}; 500 PhysReg s1_lo{1}; 501 PhysReg v0_lo{256}; 502 PhysReg v1_lo{257}; 503 504 for (unsigned i = GFX7; i <= GFX9; i++) { 505 for (unsigned is_signed = 0; is_signed <= 1; is_signed++) { 506 if (!setup_cs(NULL, (chip_class)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned")) 507 continue; 508 509 #define EXT(idx, size) \ 510 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 511 Operand::c32(size), Operand::c32(is_signed)); 512 513 //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32' 514 //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32' 515 //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32' 516 //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32' 517 //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n) 518 519 //>> p_unit_test 0 520 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 521 //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8 522 EXT(0, 8) 523 //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8 524 EXT(1, 8) 525 //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8 526 EXT(2, 8) 527 //! v1: %_:v[0] = @v_shr 24, %_:v[1] 528 EXT(3, 8) 529 //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16 530 EXT(0, 16) 531 //! v1: %_:v[0] = @v_shr 16, %_:v[1] 532 EXT(1, 16) 533 534 #undef EXT 535 536 #define EXT(idx, size) \ 537 bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \ 538 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed)); 539 540 //>> p_unit_test 2 541 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 542 //~gfx._unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000 543 //~gfx._signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1] 544 EXT(0, 8) 545 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008 546 EXT(1, 8) 547 //! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010 548 EXT(2, 8) 549 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24 550 EXT(3, 8) 551 //~gfx._unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000 552 //~gfx._signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1] 553 EXT(0, 16) 554 //! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16 555 EXT(1, 16) 556 557 #undef EXT 558 559 #define EXT(idx, src_b) \ 560 bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \ 561 Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed)); 562 563 //>> p_unit_test 4 564 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); 565 //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8 566 //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0) 567 EXT(0, 0) 568 //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2) 569 if (i != GFX7) 570 EXT(0, 2) 571 //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8 572 //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1) 573 EXT(1, 0) 574 //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3) 575 if (i != GFX7) 576 EXT(1, 2) 577 578 #undef EXT 579 580 finish_to_hw_instr_test(); 581 582 //! s_endpgm 583 } 584 } 585 END_TEST 586 587 BEGIN_TEST(to_hw_instr.insert) 588 PhysReg s0_lo{0}; 589 PhysReg s1_lo{1}; 590 PhysReg v0_lo{256}; 591 PhysReg v1_lo{257}; 592 593 for (unsigned i = GFX7; i <= GFX9; i++) { 594 if (!setup_cs(NULL, (chip_class)i)) 595 continue; 596 597 #define INS(idx, size) \ 598 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \ 599 Operand::c32(size)); 600 601 //>> p_unit_test 0 602 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 603 //! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8 604 INS(0, 8) 605 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 606 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0] 607 //~gfx[^7]! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1 src0_sel:dword 608 INS(1, 8) 609 //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 610 //~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0] 611 //~gfx[^7]! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2 src0_sel:dword 612 INS(2, 8) 613 //! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1] 614 INS(3, 8) 615 //! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16 616 INS(0, 16) 617 //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1] 618 INS(1, 16) 619 620 #undef INS 621 622 #define INS(idx, size) \ 623 bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \ 624 Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size)); 625 626 //>> p_unit_test 1 627 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); 628 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 629 INS(0, 8) 630 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 631 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 8 632 INS(1, 8) 633 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 634 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 16 635 INS(2, 8) 636 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 24 637 INS(3, 8) 638 //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000 639 INS(0, 16) 640 //! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16 641 INS(1, 16) 642 643 #undef INS 644 645 #define INS(idx, def_b) \ 646 bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \ 647 Operand::c32(idx), Operand::c32(8u)); 648 649 //>> p_unit_test 2 650 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); 651 //~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8 652 //~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 653 INS(0, 0) 654 //~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 655 if (i != GFX7) 656 INS(0, 2) 657 //~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16] 658 //~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 659 INS(1, 0) 660 //~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 661 if (i != GFX7) 662 INS(1, 2) 663 664 #undef INS 665 666 finish_to_hw_instr_test(); 667 668 //! s_endpgm 669 } 670 END_TEST 671 672 BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc) 673 if (!setup_cs(NULL, GFX10)) 674 return; 675 676 PhysReg reg_s0{0}; 677 PhysReg reg_s1{1}; 678 PhysReg v0_lo{256}; 679 PhysReg v0_b3{256}; 680 v0_b3.reg_b += 3; 681 PhysReg v1_lo{257}; 682 683 //>> p_unit_test 0 684 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 685 686 /* It would be better if the scc=s0 copy was done later, but handle_operands() is complex 687 * enough 688 */ 689 690 //! s1: %0:scc = s_cmp_lg_i32 %0:s[0], 0 691 //! s1: %0:m0 = s_mov_b32 %0:scc 692 //! lv1: %0:v[0] = v_mov_b32 %0:v[1] 693 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 694 //! lv1: %0:v[0] = v_mov_b32 %0:v[1] 695 //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec 696 //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0 697 Instruction *instr = bld.pseudo( 698 aco_opcode::p_parallelcopy, 699 Definition(scc, s1), Definition(v0_lo, v1.as_linear()), 700 Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear())); 701 instr->pseudo().scratch_sgpr = m0; 702 703 finish_to_hw_instr_test(); 704 END_TEST 705 706 BEGIN_TEST(to_hw_instr.swap_linear_vgpr) 707 if (!setup_cs(NULL, GFX10)) 708 return; 709 710 PhysReg reg_v0{256}; 711 PhysReg reg_v1{257}; 712 RegClass v1_linear = v1.as_linear(); 713 714 //>> p_unit_test 0 715 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 716 717 Instruction *instr = bld.pseudo( 718 aco_opcode::p_parallelcopy, 719 Definition(reg_v0, v1_linear), Definition(reg_v1, v1_linear), 720 Operand(reg_v1, v1_linear), Operand(reg_v0, v1_linear)); 721 instr->pseudo().scratch_sgpr = m0; 722 723 finish_to_hw_instr_test(); 724 END_TEST 725