1# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
2
3# RUN: llc -march=amdgcn  -verify-machineinstrs -mattr=+volcanic-islands -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s
4# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s
5---
6# Trivial clause at beginning of program
7name: trivial_clause_load_flat4_x1
8
9body: |
10  bb.0:
11    ; GCN-LABEL: name: trivial_clause_load_flat4_x1
12    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
13    ; GCN-NEXT: S_ENDPGM 0
14
15    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
16    S_ENDPGM 0
17...
18---
19# Trivial clause at beginning of program
20name: trivial_clause_load_flat4_x2
21
22body: |
23  bb.0:
24    ; GCN-LABEL: name: trivial_clause_load_flat4_x2
25    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
26    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
27    ; GCN-NEXT: S_ENDPGM 0
28
29    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
30    $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
31    S_ENDPGM 0
32...
33---
34# Trivial clause at beginning of program
35name: trivial_clause_load_flat4_x3
36
37body: |
38  bb.0:
39    ; GCN-LABEL: name: trivial_clause_load_flat4_x3
40    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
41    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr
42    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr
43    ; GCN-NEXT: S_ENDPGM 0
44
45    $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
46    $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr
47    $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr
48    S_ENDPGM 0
49...
50---
51# Trivial clause at beginning of program
52name: trivial_clause_load_flat4_x4
53
54body: |
55  bb.0:
56    ; GCN-LABEL: name: trivial_clause_load_flat4_x4
57    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
58    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
59    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr
60    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr
61    ; GCN-NEXT: S_ENDPGM 0
62
63    $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
64    $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
65    $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr
66    $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr
67    S_ENDPGM 0
68...
69---
70# Reuse of same input pointer is OK
71
72name: trivial_clause_load_flat4_x2_sameptr
73body: |
74  bb.0:
75    ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
76    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
77    ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
78    ; GCN-NEXT: S_ENDPGM 0
79
80    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
81    $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
82    S_ENDPGM 0
83...
84---
85# 32-bit load partially clobbers its own ptr reg
86name: flat_load4_overwrite_ptr_lo
87
88body: |
89  bb.0:
90    ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
91    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
92    ; GCN-NEXT: S_ENDPGM 0
93
94    $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
95    S_ENDPGM 0
96...
97---
98# 32-bit load partially clobbers its own ptr reg
99name: flat_load4_overwrite_ptr_hi
100
101body: |
102  bb.0:
103    ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
104    ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
105    ; GCN-NEXT: S_ENDPGM 0
106
107    $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
108    S_ENDPGM 0
109...
110---
111# 64-bit load clobbers its own ptr reg
112name: flat_load8_overwrite_ptr
113
114body: |
115  bb.0:
116    ; GCN-LABEL: name: flat_load8_overwrite_ptr
117    ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
118    ; GCN-NEXT: S_ENDPGM 0
119
120    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
121    S_ENDPGM 0
122...
123---
124# vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
125# breaks the clause.
126
127
128name: break_clause_at_max_clause_size_flat_load4
129
130body: |
131  bb.0:
132    ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
133    ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
134    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
135    ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
136    ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
137    ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
138    ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
139    ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
140    ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
141    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
142    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
143    ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
144    ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
145    ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
146    ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
147    ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
148    ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
149    ; XNACK-NEXT: S_NOP 0
150    ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
151    ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
152    ; GCN-NEXT: S_ENDPGM 0
153
154    $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
155    $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
156    $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
157    $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
158
159    $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
160    $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
161    $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
162    $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
163
164    $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
165    $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
166    $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
167    $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
168
169    $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
170    $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
171    $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
172    $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
173
174    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
175    $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
176    S_ENDPGM 0
177...
178---
179
180name: break_clause_simple_load_flat4_lo_ptr
181
182body: |
183  bb.0:
184    ; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
185    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
186    ; XNACK-NEXT: S_NOP 0
187    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
188    ; GCN-NEXT: S_ENDPGM 0
189
190    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
191    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
192    S_ENDPGM 0
193...
194---
195
196name: break_clause_simple_load_flat4_hi_ptr
197
198body: |
199  bb.0:
200    ; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
201    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
202    ; XNACK-NEXT: S_NOP 0
203    ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
204    ; GCN-NEXT: S_ENDPGM 0
205
206    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
207    $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
208    S_ENDPGM 0
209...
210---
211
212name: break_clause_simple_load_flat8_ptr
213
214body: |
215  bb.0:
216    ; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
217    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
218    ; XNACK-NEXT: S_NOP 0
219    ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
220    ; GCN-NEXT: S_ENDPGM 0
221
222    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
223    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
224    S_ENDPGM 0
225...
226---
227
228
229name: break_clause_simple_load_flat16_ptr
230
231body: |
232  bb.0:
233    ; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
234    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
235    ; XNACK-NEXT: S_NOP 0
236    ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
237    ; GCN-NEXT: S_ENDPGM 0
238    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
239    $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
240    S_ENDPGM 0
241...
242---
243
244# The clause is broken by the waitcnt inserted at the end of the
245# block, so no nop is needed.
246
247
248name: break_clause_block_boundary_load_flat8_ptr
249
250body: |
251  ; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
252  ; GCN: bb.0:
253  ; GCN-NEXT:   successors: %bb.1(0x80000000)
254  ; GCN:   $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
255  ; GCN: bb.1:
256  ; XNACK-NEXT:  S_NOP 0
257  ; GCN-NEXT:   $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
258  ; GCN-NEXT:   S_ENDPGM 0
259
260  bb.0:
261    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
262
263  bb.1:
264    $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
265    S_ENDPGM 0
266...
267---
268# The load clobbers the pointer of the store, so it needs to break.
269
270name: break_clause_store_load_into_ptr_flat4
271
272body: |
273  bb.0:
274    ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
275    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
276    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
277    ; GCN-NEXT: S_ENDPGM 0
278
279    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
280    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
281    S_ENDPGM 0
282...
283---
284# The load clobbers the data of the store, so it needs to break.
285# FIXME: Would it be better to s_nop and wait later?
286
287name: break_clause_store_load_into_data_flat4
288
289body: |
290  bb.0:
291    ; GCN-LABEL: name: break_clause_store_load_into_data_flat4
292    ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
293    ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
294    ; GCN-NEXT: S_ENDPGM 0
295
296    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
297    $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
298    S_ENDPGM 0
299...
300---
301# Regular VALU instruction breaks clause, no nop needed
302
303name: valu_inst_breaks_clause
304
305body: |
306  bb.0:
307    ; GCN-LABEL: name: valu_inst_breaks_clause
308    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
309    ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
310    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
311    ; GCN-NEXT: S_ENDPGM 0
312
313    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
314    $vgpr8 = V_MOV_B32_e32 0, implicit $exec
315    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
316    S_ENDPGM 0
317...
318---
319# Regular SALU instruction breaks clause, no nop needed
320
321name: salu_inst_breaks_clause
322
323body: |
324  bb.0:
325    ; GCN-LABEL: name: salu_inst_breaks_clause
326    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
327    ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
328    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
329    ; GCN-NEXT: S_ENDPGM 0
330
331    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
332    $sgpr8 = S_MOV_B32 0
333    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
334    S_ENDPGM 0
335...
336---
337
338name: ds_inst_breaks_clause
339
340body: |
341  bb.0:
342    ; GCN-LABEL: name: ds_inst_breaks_clause
343    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
344    ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
345    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
346    ; GCN-NEXT: S_ENDPGM 0
347
348    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
349    $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
350    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
351    S_ENDPGM 0
352...
353---
354
355name: smrd_inst_breaks_clause
356
357body: |
358  bb.0:
359    ; GCN-LABEL: name: smrd_inst_breaks_clause
360    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
361    ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
362    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
363    ; GCN-NEXT: S_ENDPGM 0
364
365    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
366    $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
367    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
368    S_ENDPGM 0
369...
370---
371# FIXME: Should this be handled?
372name: implicit_use_breaks_clause
373
374body: |
375  bb.0:
376    ; GCN-LABEL: name: implicit_use_breaks_clause
377    ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
378    ; XNACK-NEXT: S_NOP 0
379    ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
380    ; GCN-NEXT: S_ENDPGM 0
381
382    $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
383    $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
384    S_ENDPGM 0
385...
386---
387name: trivial_clause_load_mubuf4_x2
388
389body: |
390  bb.0:
391    ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
392    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
393    ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
394    ; GCN-NEXT: S_ENDPGM 0
395
396    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
397    $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
398    S_ENDPGM 0
399...
400---
401name: break_clause_simple_load_mubuf_offen_ptr
402
403body: |
404  bb.0:
405    ; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
406    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
407    ; XNACK-NEXT: S_NOP 0
408    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
409    ; GCN-NEXT: S_ENDPGM 0
410
411    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
412    $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
413    S_ENDPGM 0
414...
415---
416# BUFFER instructions overwriting their own inputs is supposedly OK.
417
418name: mubuf_load4_overwrite_ptr
419
420body: |
421  bb.0:
422    ; GCN-LABEL: name: mubuf_load4_overwrite_ptr
423    ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
424    ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
425    ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
426    ; GCN-NEXT: S_ENDPGM 0
427    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
428    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
429    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
430    S_ENDPGM 0
431...
432---
433# Break a clause from interference between mubuf and flat instructions
434
435name: break_clause_flat_load_mubuf_load
436
437body: |
438  bb.0:
439    ; GCN-LABEL: name: break_clause_flat_load_mubuf_load
440    ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
441    ; XNACK-NEXT: S_NOP 0
442    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
443    ; GCN-NEXT: S_ENDPGM 0
444
445    $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
446    $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
447    S_ENDPGM 0
448...
449# Break a clause from interference between mubuf and flat instructions
450
451# GCN-LABEL: name: break_clause_mubuf_load_flat_load
452# GCN: bb.0:
453# GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
454# XNACK-NEXT: S_NOP 0
455# GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3
456# GCN-NEXT: S_ENDPGM 0
457name: break_clause_mubuf_load_flat_load
458
459body: |
460  bb.0:
461    $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
462    $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
463
464    S_ENDPGM 0
465...
466---
467
468name: break_clause_atomic_rtn_into_ptr_flat4
469
470body: |
471  bb.0:
472    ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
473    ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
474    ; XNACK-NEXT: S_NOP 0
475    ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr
476    ; GCN-NEXT: S_ENDPGM 0
477
478    $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
479    $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr
480    S_ENDPGM 0
481...
482---
483name: break_clause_atomic_nortn_ptr_load_flat4
484
485body: |
486  bb.0:
487    ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
488    ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
489    ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
490    ; GCN-NEXT: S_ENDPGM 0
491
492    FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
493    $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
494    S_ENDPGM 0
495...
496---
497
498name: break_clause_atomic_rtn_into_ptr_mubuf4
499
500body: |
501  bb.0:
502    ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
503    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
504    ; XNACK-NEXT: S_NOP 0
505    ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec
506    ; GCN-NEXT: S_ENDPGM 0
507
508    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
509    $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec
510    S_ENDPGM 0
511...
512---
513
514name: break_clause_atomic_nortn_ptr_load_mubuf4
515
516body: |
517  bb.0:
518    ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
519    ; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
520    ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
521    ; GCN-NEXT: S_ENDPGM 0
522
523    BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
524    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
525    S_ENDPGM 0
526...
527---
528# Make sure there is no assert on mubuf instructions which do not have
529# vaddr, and don't add register to track.
530name: no_break_clause_mubuf_load_novaddr
531
532body: |
533  bb.0:
534    ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
535    ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
536    ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
537    ; GCN-NEXT: S_ENDPGM 0
538    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
539    $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
540    S_ENDPGM 0
541...
542---
543# Loads and stores using different addresses theoretically does not
544# need a nop
545name: mix_load_store_clause
546body: |
547  bb.0:
548    ; GCN-LABEL: name: mix_load_store_clause
549    ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
550    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
551    ; XNACK-NEXT: S_NOP 0
552    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
553    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
554
555    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
556    $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
557    FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
558    $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
559    S_ENDPGM 0
560...
561---
562# Loads and stores using the same address needs a nop.
563
564name: mix_load_store_clause_same_address
565body: |
566  bb.0:
567    ; GCN-LABEL: name: mix_load_store_clause_same_address
568    ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
569    ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
570    ; XNACK-NEXT: S_NOP 0
571    ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
572    ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
573
574    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
575    $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
576    FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
577    $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
578    S_ENDPGM 0
579...
580