1# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s 2 3# RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+volcanic-islands -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s 4# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s 5--- 6# Trivial clause at beginning of program 7name: trivial_clause_load_flat4_x1 8 9body: | 10 bb.0: 11 ; GCN-LABEL: name: trivial_clause_load_flat4_x1 12 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 13 ; GCN-NEXT: S_ENDPGM 0 14 15 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 16 S_ENDPGM 0 17... 18--- 19# Trivial clause at beginning of program 20name: trivial_clause_load_flat4_x2 21 22body: | 23 bb.0: 24 ; GCN-LABEL: name: trivial_clause_load_flat4_x2 25 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 26 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 27 ; GCN-NEXT: S_ENDPGM 0 28 29 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 30 $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 31 S_ENDPGM 0 32... 33--- 34# Trivial clause at beginning of program 35name: trivial_clause_load_flat4_x3 36 37body: | 38 bb.0: 39 ; GCN-LABEL: name: trivial_clause_load_flat4_x3 40 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr 41 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr 42 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr 43 ; GCN-NEXT: S_ENDPGM 0 44 45 $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr 46 $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr 47 $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr 48 S_ENDPGM 0 49... 50--- 51# Trivial clause at beginning of program 52name: trivial_clause_load_flat4_x4 53 54body: | 55 bb.0: 56 ; GCN-LABEL: name: trivial_clause_load_flat4_x4 57 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 58 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr 59 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr 60 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr 61 ; GCN-NEXT: S_ENDPGM 0 62 63 $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 64 $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr 65 $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr 66 $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr 67 S_ENDPGM 0 68... 69--- 70# Reuse of same input pointer is OK 71 72name: trivial_clause_load_flat4_x2_sameptr 73body: | 74 bb.0: 75 ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr 76 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 77 ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 78 ; GCN-NEXT: S_ENDPGM 0 79 80 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 81 $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 82 S_ENDPGM 0 83... 84--- 85# 32-bit load partially clobbers its own ptr reg 86name: flat_load4_overwrite_ptr_lo 87 88body: | 89 bb.0: 90 ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo 91 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 92 ; GCN-NEXT: S_ENDPGM 0 93 94 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 95 S_ENDPGM 0 96... 97--- 98# 32-bit load partially clobbers its own ptr reg 99name: flat_load4_overwrite_ptr_hi 100 101body: | 102 bb.0: 103 ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi 104 ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 105 ; GCN-NEXT: S_ENDPGM 0 106 107 $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 108 S_ENDPGM 0 109... 110--- 111# 64-bit load clobbers its own ptr reg 112name: flat_load8_overwrite_ptr 113 114body: | 115 bb.0: 116 ; GCN-LABEL: name: flat_load8_overwrite_ptr 117 ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 118 ; GCN-NEXT: S_ENDPGM 0 119 120 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 121 S_ENDPGM 0 122... 123--- 124# vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt 125# breaks the clause. 126 127 128name: break_clause_at_max_clause_size_flat_load4 129 130body: | 131 bb.0: 132 ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4 133 ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 134 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 135 ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 136 ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 137 ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 138 ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 139 ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 140 ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 141 ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 142 ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 143 ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 144 ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 145 ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 146 ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 147 ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 148 ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 149 ; XNACK-NEXT: S_NOP 0 150 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 151 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18 152 ; GCN-NEXT: S_ENDPGM 0 153 154 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 155 $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 156 $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 157 $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 158 159 $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 160 $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 161 $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 162 $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 163 164 $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 165 $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 166 $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 167 $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 168 169 $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 170 $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 171 $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 172 $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr 173 174 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 175 $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18 176 S_ENDPGM 0 177... 178--- 179 180name: break_clause_simple_load_flat4_lo_ptr 181 182body: | 183 bb.0: 184 ; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr 185 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 186 ; XNACK-NEXT: S_NOP 0 187 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 188 ; GCN-NEXT: S_ENDPGM 0 189 190 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 191 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 192 S_ENDPGM 0 193... 194--- 195 196name: break_clause_simple_load_flat4_hi_ptr 197 198body: | 199 bb.0: 200 ; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr 201 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 202 ; XNACK-NEXT: S_NOP 0 203 ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 204 ; GCN-NEXT: S_ENDPGM 0 205 206 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 207 $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 208 S_ENDPGM 0 209... 210--- 211 212name: break_clause_simple_load_flat8_ptr 213 214body: | 215 bb.0: 216 ; GCN-LABEL: name: break_clause_simple_load_flat8_ptr 217 ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 218 ; XNACK-NEXT: S_NOP 0 219 ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 220 ; GCN-NEXT: S_ENDPGM 0 221 222 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 223 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 224 S_ENDPGM 0 225... 226--- 227 228 229name: break_clause_simple_load_flat16_ptr 230 231body: | 232 bb.0: 233 ; GCN-LABEL: name: break_clause_simple_load_flat16_ptr 234 ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 235 ; XNACK-NEXT: S_NOP 0 236 ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr 237 ; GCN-NEXT: S_ENDPGM 0 238 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 239 $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr 240 S_ENDPGM 0 241... 242--- 243 244# The clause is broken by the waitcnt inserted at the end of the 245# block, so no nop is needed. 246 247 248name: break_clause_block_boundary_load_flat8_ptr 249 250body: | 251 ; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr 252 ; GCN: bb.0: 253 ; GCN-NEXT: successors: %bb.1(0x80000000) 254 ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 255 ; GCN: bb.1: 256 ; XNACK-NEXT: S_NOP 0 257 ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 258 ; GCN-NEXT: S_ENDPGM 0 259 260 bb.0: 261 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 262 263 bb.1: 264 $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 265 S_ENDPGM 0 266... 267--- 268# The load clobbers the pointer of the store, so it needs to break. 269 270name: break_clause_store_load_into_ptr_flat4 271 272body: | 273 bb.0: 274 ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4 275 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr 276 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 277 ; GCN-NEXT: S_ENDPGM 0 278 279 FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr 280 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 281 S_ENDPGM 0 282... 283--- 284# The load clobbers the data of the store, so it needs to break. 285# FIXME: Would it be better to s_nop and wait later? 286 287name: break_clause_store_load_into_data_flat4 288 289body: | 290 bb.0: 291 ; GCN-LABEL: name: break_clause_store_load_into_data_flat4 292 ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr 293 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 294 ; GCN-NEXT: S_ENDPGM 0 295 296 FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr 297 $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 298 S_ENDPGM 0 299... 300--- 301# Regular VALU instruction breaks clause, no nop needed 302 303name: valu_inst_breaks_clause 304 305body: | 306 bb.0: 307 ; GCN-LABEL: name: valu_inst_breaks_clause 308 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 309 ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec 310 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 311 ; GCN-NEXT: S_ENDPGM 0 312 313 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 314 $vgpr8 = V_MOV_B32_e32 0, implicit $exec 315 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 316 S_ENDPGM 0 317... 318--- 319# Regular SALU instruction breaks clause, no nop needed 320 321name: salu_inst_breaks_clause 322 323body: | 324 bb.0: 325 ; GCN-LABEL: name: salu_inst_breaks_clause 326 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 327 ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 328 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 329 ; GCN-NEXT: S_ENDPGM 0 330 331 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 332 $sgpr8 = S_MOV_B32 0 333 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 334 S_ENDPGM 0 335... 336--- 337 338name: ds_inst_breaks_clause 339 340body: | 341 bb.0: 342 ; GCN-LABEL: name: ds_inst_breaks_clause 343 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 344 ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec 345 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 346 ; GCN-NEXT: S_ENDPGM 0 347 348 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 349 $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec 350 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 351 S_ENDPGM 0 352... 353--- 354 355name: smrd_inst_breaks_clause 356 357body: | 358 bb.0: 359 ; GCN-LABEL: name: smrd_inst_breaks_clause 360 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 361 ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 362 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 363 ; GCN-NEXT: S_ENDPGM 0 364 365 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 366 $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 367 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 368 S_ENDPGM 0 369... 370--- 371# FIXME: Should this be handled? 372name: implicit_use_breaks_clause 373 374body: | 375 bb.0: 376 ; GCN-LABEL: name: implicit_use_breaks_clause 377 ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 378 ; XNACK-NEXT: S_NOP 0 379 ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr 380 ; GCN-NEXT: S_ENDPGM 0 381 382 $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 383 $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr 384 S_ENDPGM 0 385... 386--- 387name: trivial_clause_load_mubuf4_x2 388 389body: | 390 bb.0: 391 ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2 392 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 393 ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 394 ; GCN-NEXT: S_ENDPGM 0 395 396 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 397 $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 398 S_ENDPGM 0 399... 400--- 401name: break_clause_simple_load_mubuf_offen_ptr 402 403body: | 404 bb.0: 405 ; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr 406 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 407 ; XNACK-NEXT: S_NOP 0 408 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 409 ; GCN-NEXT: S_ENDPGM 0 410 411 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 412 $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 413 S_ENDPGM 0 414... 415--- 416# BUFFER instructions overwriting their own inputs is supposedly OK. 417 418name: mubuf_load4_overwrite_ptr 419 420body: | 421 bb.0: 422 ; GCN-LABEL: name: mubuf_load4_overwrite_ptr 423 ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 424 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec 425 ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec 426 ; GCN-NEXT: S_ENDPGM 0 427 $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 428 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 429 $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec 430 S_ENDPGM 0 431... 432--- 433# Break a clause from interference between mubuf and flat instructions 434 435name: break_clause_flat_load_mubuf_load 436 437body: | 438 bb.0: 439 ; GCN-LABEL: name: break_clause_flat_load_mubuf_load 440 ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 441 ; XNACK-NEXT: S_NOP 0 442 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 443 ; GCN-NEXT: S_ENDPGM 0 444 445 $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 446 $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 447 S_ENDPGM 0 448... 449# Break a clause from interference between mubuf and flat instructions 450 451# GCN-LABEL: name: break_clause_mubuf_load_flat_load 452# GCN: bb.0: 453# GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 454# XNACK-NEXT: S_NOP 0 455# GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3 456# GCN-NEXT: S_ENDPGM 0 457name: break_clause_mubuf_load_flat_load 458 459body: | 460 bb.0: 461 $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 462 $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 463 464 S_ENDPGM 0 465... 466--- 467 468name: break_clause_atomic_rtn_into_ptr_flat4 469 470body: | 471 bb.0: 472 ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4 473 ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 474 ; XNACK-NEXT: S_NOP 0 475 ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr 476 ; GCN-NEXT: S_ENDPGM 0 477 478 $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 479 $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr 480 S_ENDPGM 0 481... 482--- 483name: break_clause_atomic_nortn_ptr_load_flat4 484 485body: | 486 bb.0: 487 ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4 488 ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr 489 ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr 490 ; GCN-NEXT: S_ENDPGM 0 491 492 FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr 493 $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr 494 S_ENDPGM 0 495... 496--- 497 498name: break_clause_atomic_rtn_into_ptr_mubuf4 499 500body: | 501 bb.0: 502 ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4 503 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 504 ; XNACK-NEXT: S_NOP 0 505 ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec 506 ; GCN-NEXT: S_ENDPGM 0 507 508 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 509 $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec 510 S_ENDPGM 0 511... 512--- 513 514name: break_clause_atomic_nortn_ptr_load_mubuf4 515 516body: | 517 bb.0: 518 ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4 519 ; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec 520 ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 521 ; GCN-NEXT: S_ENDPGM 0 522 523 BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec 524 $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 525 S_ENDPGM 0 526... 527--- 528# Make sure there is no assert on mubuf instructions which do not have 529# vaddr, and don't add register to track. 530name: no_break_clause_mubuf_load_novaddr 531 532body: | 533 bb.0: 534 ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr 535 ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 536 ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 537 ; GCN-NEXT: S_ENDPGM 0 538 $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 539 $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec 540 S_ENDPGM 0 541... 542--- 543# Loads and stores using different addresses theoretically does not 544# need a nop 545name: mix_load_store_clause 546body: | 547 bb.0: 548 ; GCN-LABEL: name: mix_load_store_clause 549 ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr 550 ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 551 ; XNACK-NEXT: S_NOP 0 552 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr 553 ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 554 555 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr 556 $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 557 FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr 558 $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 559 S_ENDPGM 0 560... 561--- 562# Loads and stores using the same address needs a nop. 563 564name: mix_load_store_clause_same_address 565body: | 566 bb.0: 567 ; GCN-LABEL: name: mix_load_store_clause_same_address 568 ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr 569 ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 570 ; XNACK-NEXT: S_NOP 0 571 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr 572 ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 573 574 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr 575 $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr 576 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr 577 $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr 578 S_ENDPGM 0 579... 580