1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination  %s -o - | FileCheck -check-prefix=GCN %s
3
4# Kernels can have no FP
5---
6name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
7tracksRegLiveness: true
8frameInfo:
9  maxAlignment:    4
10  localFrameSize:  4
11stack:
12  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
13machineFunctionInfo:
14  isEntryFunction: true
15  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
16  stackPtrOffsetReg: '$sgpr32'
17body:             |
18  bb.0:
19    liveins: $sgpr12_sgpr13_sgpr14_sgpr15
20
21    ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
22    ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
23    ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
24    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
25    ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
26    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
27    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
28    %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
29    %1:sreg_32_xm0 = S_MOV_B32 0
30    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
31    %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, implicit $exec
32    $vgpr0 = COPY %3
33    SI_RETURN_TO_EPILOG $vgpr0
34
35...
36
37---
38name: kernel_no_fold_fi_non_stack_rsrc
39tracksRegLiveness: true
40frameInfo:
41  maxAlignment:    4
42  localFrameSize:  4
43stack:
44  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
45machineFunctionInfo:
46  isEntryFunction: true
47  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
48  stackPtrOffsetReg: '$sgpr32'
49body:             |
50  bb.0:
51    liveins: $sgpr12_sgpr13_sgpr14_sgpr15
52
53    ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc
54    ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
55    ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
56    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
57    ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
58    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
59    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
60    %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
61    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
62    %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, 0, implicit $exec
63    $vgpr0 = COPY %3
64    SI_RETURN_TO_EPILOG $vgpr0
65
66...
67
68---
69name: kernel_no_fold_fi_non_stack_soffset
70tracksRegLiveness: true
71frameInfo:
72  maxAlignment:    4
73  localFrameSize:  4
74stack:
75  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
76machineFunctionInfo:
77  isEntryFunction: true
78  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
79  stackPtrOffsetReg: '$sgpr32'
80body:             |
81  bb.0:
82
83    ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_soffset
84    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
85    ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
86    ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
87    ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
88    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
89    ; GCN: S_ENDPGM 0, implicit $vgpr0
90    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
91    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
92    %2:sreg_32_xm0 = S_MOV_B32 0
93
94    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, implicit $exec
95    %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, implicit $exec
96    $vgpr0 = COPY %3
97    S_ENDPGM 0, implicit $vgpr0
98
99...
100
101---
102name: kernel_fold_fi_mubuf
103tracksRegLiveness: true
104frameInfo:
105  maxAlignment:    4
106  localFrameSize:  4
107stack:
108  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
109machineFunctionInfo:
110  isEntryFunction: true
111  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
112  stackPtrOffsetReg: '$sgpr32'
113body:             |
114  bb.0:
115
116    ; GCN-LABEL: name: kernel_fold_fi_mubuf
117    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
118    ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
119    ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
120    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
121    ; GCN: S_ENDPGM 0, implicit $vgpr0
122    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
123    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
124
125    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
126    %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
127    $vgpr0 = COPY %2
128    S_ENDPGM 0, implicit $vgpr0
129
130...
131
132
133# Functions have an unswizzled SP/FP relative to the wave offset
134---
135name: function_no_fold_fi_non_stack_rsrc_and_soffset
136tracksRegLiveness: true
137frameInfo:
138  maxAlignment:    4
139  localFrameSize:  4
140stack:
141  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
142machineFunctionInfo:
143  isEntryFunction: false
144  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
145  frameOffsetReg:  '$sgpr32'
146  stackPtrOffsetReg: '$sgpr32'
147body:             |
148  bb.0:
149    liveins: $sgpr12_sgpr13_sgpr14_sgpr15
150
151    ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc_and_soffset
152    ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
153    ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
154    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
155    ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
156    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
157    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
158    %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
159    %1:sreg_32_xm0 = S_MOV_B32 0
160    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
161    %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, implicit $exec
162    $vgpr0 = COPY %3
163    SI_RETURN_TO_EPILOG $vgpr0
164
165...
166
167---
168name: function_no_fold_fi_non_stack_rsrc
169tracksRegLiveness: true
170frameInfo:
171  maxAlignment:    4
172  localFrameSize:  4
173stack:
174  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
175machineFunctionInfo:
176  isEntryFunction: false
177  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
178  frameOffsetReg:  '$sgpr32'
179  stackPtrOffsetReg: '$sgpr32'
180body:             |
181  bb.0:
182    liveins: $sgpr12_sgpr13_sgpr14_sgpr15
183
184    ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc
185    ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
186    ; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
187    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
188    ; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec
189    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
190    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
191    %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
192    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
193    %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, 0, implicit $exec
194    $vgpr0 = COPY %3
195    SI_RETURN_TO_EPILOG $vgpr0
196
197...
198
199---
200name: function_no_fold_fi_non_stack_soffset
201tracksRegLiveness: true
202frameInfo:
203  maxAlignment:    4
204  localFrameSize:  4
205stack:
206  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
207machineFunctionInfo:
208  isEntryFunction: false
209  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
210  frameOffsetReg:  '$sgpr32'
211  stackPtrOffsetReg: '$sgpr32'
212body:             |
213  bb.0:
214
215    ; GCN-LABEL: name: function_no_fold_fi_non_stack_soffset
216    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
217    ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
218    ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
219    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
220    ; GCN: S_ENDPGM 0, implicit $vgpr0
221    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
222    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
223
224    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
225    %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
226    $vgpr0 = COPY %2
227    S_ENDPGM 0, implicit $vgpr0
228
229...
230
231---
232name: function_fold_fi_mubuf_wave_relative
233tracksRegLiveness: true
234frameInfo:
235  maxAlignment:    4
236  localFrameSize:  4
237stack:
238  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
239machineFunctionInfo:
240  isEntryFunction: false
241  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
242  frameOffsetReg:  '$sgpr32'
243  stackPtrOffsetReg: '$sgpr32'
244body:             |
245  bb.0:
246
247    ; GCN-LABEL: name: function_fold_fi_mubuf_wave_relative
248    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
249    ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
250    ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
251    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
252    ; GCN: S_ENDPGM 0, implicit $vgpr0
253    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
254    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
255
256    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
257    %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
258    $vgpr0 = COPY %2
259    S_ENDPGM 0, implicit $vgpr0
260
261...
262
263---
264name: function_fold_fi_mubuf_stack_relative
265tracksRegLiveness: true
266frameInfo:
267  maxAlignment:    4
268  localFrameSize:  4
269stack:
270  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
271machineFunctionInfo:
272  isEntryFunction: false
273  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
274  frameOffsetReg:  '$sgpr32'
275  stackPtrOffsetReg: '$sgpr32'
276body:             |
277  bb.0:
278
279    ; GCN-LABEL: name: function_fold_fi_mubuf_stack_relative
280    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
281    ; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
282    ; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
283    ; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
284    ; GCN: S_ENDPGM 0, implicit $vgpr0
285    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
286    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
287
288    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
289    %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
290    $vgpr0 = COPY %2
291    S_ENDPGM 0, implicit $vgpr0
292
293...
294