1# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s
3
4
5---
6# ALL-LABEL: name: kernel0
7# FULL: machineFunctionInfo:
8# FULL-NEXT: explicitKernArgSize: 128
9# FULL-NEXT: maxKernArgAlign: 64
10# FULL-NEXT: ldsSize: 2048
11# FULL-NEXT: dynLDSAlign: 1
12# FULL-NEXT: isEntryFunction: true
13# FULL-NEXT: noSignedZerosFPMath: false
14# FULL-NEXT: memoryBound:     true
15# FULL-NEXT: waveLimiter:     true
16# FULL-NEXT: hasSpilledSGPRs: false
17# FULL-NEXT: hasSpilledVGPRs: false
18# FULL-NEXT: scratchRSrcReg:  '$sgpr8_sgpr9_sgpr10_sgpr11'
19# FULL-NEXT: frameOffsetReg:  '$sgpr12'
20# FULL-NEXT: stackPtrOffsetReg:  '$sgpr13'
21# FULL-NEXT: argumentInfo:
22# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
23# FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
24# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
25# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
26# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
27# FULL-NEXT: mode:
28# FULL-NEXT: ieee: true
29# FULL-NEXT: dx10-clamp: true
30# FULL-NEXT: fp32-input-denormals: true
31# FULL-NEXT: fp32-output-denormals: true
32# FULL-NEXT: fp64-fp16-input-denormals: true
33# FULL-NEXT: fp64-fp16-output-denormals: true
34# FULL-NEXT:  highBitsOf32BitAddress: 0
35# FULL-NEXT:  occupancy: 10
36# FULL-NEXT: body:
37
38# SIMPLE: machineFunctionInfo:
39# SIMPLE-NEXT: explicitKernArgSize: 128
40# SIMPLE-NEXT: maxKernArgAlign: 64
41# SIMPLE-NEXT: ldsSize: 2048
42# SIMPLE-NEXT: isEntryFunction: true
43# SIMPLE-NEXT: memoryBound: true
44# SIMPLE-NEXT: waveLimiter: true
45# SIMPLE-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11'
46# SIMPLE-NEXT: frameOffsetReg:  '$sgpr12'
47# SIMPLE-NEXT: stackPtrOffsetReg:  '$sgpr13'
48# SIMPLE-NEXT: argumentInfo:
49# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
50# SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
51# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
52# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
53# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
54# SIMPLE-NEXT: occupancy: 10
55# SIMPLE-NEXT: body:
56name: kernel0
57machineFunctionInfo:
58  explicitKernArgSize: 128
59  maxKernArgAlign: 64
60  ldsSize: 2048
61  isEntryFunction: true
62  noSignedZerosFPMath: false
63  memoryBound:     true
64  waveLimiter:     true
65  scratchRSrcReg:  '$sgpr8_sgpr9_sgpr10_sgpr11'
66  frameOffsetReg: '$sgpr12'
67  stackPtrOffsetReg:  '$sgpr13'
68  argumentInfo:
69    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
70    kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
71    workGroupIDX:    { reg: '$sgpr6' }
72    privateSegmentWaveByteOffset: { reg: '$sgpr7' }
73    workItemIDX:     { reg: '$vgpr0' }
74body:             |
75  bb.0:
76    S_ENDPGM 0
77
78...
79
80# FIXME: Should be able to not print section for simple
81---
82# ALL-LABEL: name: no_mfi
83# FULL: machineFunctionInfo:
84# FULL-NEXT: explicitKernArgSize: 0
85# FULL-NEXT: maxKernArgAlign: 1
86# FULL-NEXT: ldsSize: 0
87# FULL-NEXT: dynLDSAlign: 1
88# FULL-NEXT: isEntryFunction: false
89# FULL-NEXT: noSignedZerosFPMath: false
90# FULL-NEXT: memoryBound:     false
91# FULL-NEXT: waveLimiter:     false
92# FULL-NEXT: hasSpilledSGPRs: false
93# FULL-NEXT: hasSpilledVGPRs: false
94# FULL-NEXT: scratchRSrcReg:  '$private_rsrc_reg'
95# FULL-NEXT: frameOffsetReg:  '$fp_reg'
96# FULL-NEXT: stackPtrOffsetReg:  '$sp_reg'
97# FULL-NEXT: argumentInfo:
98# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
99# FULL-NEXT: mode:
100# FULL-NEXT: ieee: true
101# FULL-NEXT: dx10-clamp: true
102# FULL-NEXT: fp32-input-denormals: true
103# FULL-NEXT: fp32-output-denormals: true
104# FULL-NEXT: fp64-fp16-input-denormals: true
105# FULL-NEXT: fp64-fp16-output-denormals: true
106# FULL-NEXT:  highBitsOf32BitAddress: 0
107# FULL-NEXT:  occupancy: 10
108# FULL-NEXT: body:
109
110# SIMPLE: machineFunctionInfo:
111# SIMPLE-NEXT: maxKernArgAlign: 1
112# SIMPLE-NEXT: argumentInfo:
113# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
114# SIMPLE-NEXT:  occupancy: 10
115# SIMPLE-NEXT: body:
116
117name: no_mfi
118body:             |
119  bb.0:
120    S_ENDPGM 0
121
122...
123
124---
125# ALL-LABEL: name: empty_mfi
126# FULL: machineFunctionInfo:
127# FULL-NEXT: explicitKernArgSize: 0
128# FULL-NEXT: maxKernArgAlign: 1
129# FULL-NEXT: ldsSize: 0
130# FULL-NEXT: dynLDSAlign: 1
131# FULL-NEXT: isEntryFunction: false
132# FULL-NEXT: noSignedZerosFPMath: false
133# FULL-NEXT: memoryBound:     false
134# FULL-NEXT: waveLimiter:     false
135# FULL-NEXT: hasSpilledSGPRs: false
136# FULL-NEXT: hasSpilledVGPRs: false
137# FULL-NEXT: scratchRSrcReg:  '$private_rsrc_reg'
138# FULL-NEXT: frameOffsetReg:  '$fp_reg'
139# FULL-NEXT: stackPtrOffsetReg:  '$sp_reg'
140# FULL-NEXT: argumentInfo:
141# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
142# FULL-NEXT: mode:
143# FULL-NEXT: ieee: true
144# FULL-NEXT: dx10-clamp: true
145# FULL-NEXT: fp32-input-denormals: true
146# FULL-NEXT: fp32-output-denormals: true
147# FULL-NEXT: fp64-fp16-input-denormals: true
148# FULL-NEXT: fp64-fp16-output-denormals: true
149# FULL-NEXT:  highBitsOf32BitAddress: 0
150# FULL-NEXT:  occupancy: 10
151# FULL-NEXT: body:
152
153# SIMPLE: machineFunctionInfo:
154# SIMPLE-NEXT: maxKernArgAlign: 1
155# SIMPLE-NEXT: argumentInfo:
156# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
157# SIMPLE-NEXT:  occupancy: 10
158# SIMPLE-NEXT: body:
159
160name: empty_mfi
161machineFunctionInfo:
162body:             |
163  bb.0:
164    S_ENDPGM 0
165
166...
167
168---
169# ALL-LABEL: name: empty_mfi_entry_func
170# FULL: machineFunctionInfo:
171# FULL-NEXT: explicitKernArgSize: 0
172# FULL-NEXT: maxKernArgAlign: 1
173# FULL-NEXT: ldsSize: 0
174# FULL-NEXT: dynLDSAlign: 1
175# FULL-NEXT: isEntryFunction: true
176# FULL-NEXT: noSignedZerosFPMath: false
177# FULL-NEXT: memoryBound:     false
178# FULL-NEXT: waveLimiter:     false
179# FULL-NEXT: hasSpilledSGPRs: false
180# FULL-NEXT: hasSpilledVGPRs: false
181# FULL-NEXT: scratchRSrcReg:  '$private_rsrc_reg'
182# FULL-NEXT: frameOffsetReg:  '$fp_reg'
183# FULL-NEXT: stackPtrOffsetReg:  '$sp_reg'
184# FULL-NEXT: argumentInfo:
185# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
186# FULL-NEXT: mode:
187# FULL-NEXT: ieee: true
188# FULL-NEXT: dx10-clamp: true
189# FULL-NEXT: fp32-input-denormals: true
190# FULL-NEXT: fp32-output-denormals: true
191# FULL-NEXT: fp64-fp16-input-denormals: true
192# FULL-NEXT: fp64-fp16-output-denormals: true
193# FULL-NEXT:  highBitsOf32BitAddress: 0
194# FULL-NEXT:  occupancy: 10
195# FULL-NEXT: body:
196
197# SIMPLE: machineFunctionInfo:
198# SIMPLE-NEXT: maxKernArgAlign: 1
199# SIMPLE-NEXT: isEntryFunction: true
200# SIMPLE-NEXT: argumentInfo:
201# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
202# SIMPLE-NEXT: occupancy: 10
203# SIMPLE-NEXT: body:
204
205name: empty_mfi_entry_func
206machineFunctionInfo:
207  isEntryFunction: true
208body:             |
209  bb.0:
210    S_ENDPGM 0
211
212...
213
214---
215# ALL-LABEL: name: default_regs_mfi
216
217# FULL: scratchRSrcReg:  '$private_rsrc_reg'
218# FULL-NEXT: frameOffsetReg:  '$fp_reg'
219# FULL-NEXT: stackPtrOffsetReg:  '$sp_reg'
220
221# SIMPLE-NOT: scratchRSrcReg
222# SIMPLE-NOT:: stackPtrOffsetReg
223name: default_regs_mfi
224machineFunctionInfo:
225  scratchRSrcReg:  '$private_rsrc_reg'
226
227body:             |
228  bb.0:
229    S_ENDPGM 0
230
231...
232
233---
234# ALL-LABEL: name: fake_stack_arginfo
235
236# FULL: argumentInfo:
237# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
238# FULL-NEXT: flatScratchInit: { offset: 4 }
239# FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
240
241# SIMPLE: argumentInfo:
242# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
243# SIMPLE-NEXT: flatScratchInit: { offset: 4 }
244# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
245name: fake_stack_arginfo
246machineFunctionInfo:
247  argumentInfo:
248    flatScratchInit: { offset: 4 }
249    workItemIDY:     { reg: '$vgpr0' , mask: 0xff00 }
250
251body:             |
252  bb.0:
253    S_ENDPGM 0
254
255...
256
257---
258# ALL-LABEL: name: parse_mode
259# ALL: mode:
260# ALL-NEXT: ieee: false
261# ALL-NEXT: dx10-clamp: false
262# ALL-NEXT: fp32-input-denormals: false
263# ALL-NEXT: fp32-output-denormals: false
264# ALL-NEXT: fp64-fp16-input-denormals: false
265# ALL-NEXT: fp64-fp16-output-denormals: false
266
267name: parse_mode
268machineFunctionInfo:
269  mode:
270    ieee: false
271    dx10-clamp: false
272    fp32-input-denormals: false
273    fp32-output-denormals: false
274    fp64-fp16-input-denormals: false
275    fp64-fp16-output-denormals: false
276
277body:             |
278  bb.0:
279    S_ENDPGM 0
280
281...
282
283
284---
285# ALL-LABEL: name: parse_spilled_regs
286# ALL: machineFunctionInfo:
287# ALL: hasSpilledSGPRs: true
288# ALL-NEXT: hasSpilledVGPRs: true
289
290name: parse_spilled_regs
291machineFunctionInfo:
292  hasSpilledSGPRs: true
293  hasSpilledVGPRs: true
294
295body:             |
296  bb.0:
297    S_ENDPGM 0
298
299...
300
301---
302# ALL-LABEL: name: dyn_lds_with_alignment
303
304# FULL: ldsSize: 0
305# FULL-NEXT: dynLDSAlign: 8
306
307# SIMPLE: dynLDSAlign: 8
308name: dyn_lds_with_alignment
309machineFunctionInfo:
310  dynLDSAlign: 8
311
312body:             |
313  bb.0:
314    S_ENDPGM 0
315
316...
317
318---
319# ALL-LABEL: name: occupancy_0
320# ALL: occupancy: 10
321name: occupancy_0
322machineFunctionInfo:
323  occupancy: 0
324
325body:             |
326  bb.0:
327    S_ENDPGM 0
328
329...
330
331---
332# ALL-LABEL: name: occupancy_3
333# ALL: occupancy: 3
334name: occupancy_3
335machineFunctionInfo:
336  occupancy: 3
337
338body:             |
339  bb.0:
340    S_ENDPGM 0
341
342...
343
344---
345# ALL-LABEL: name: scavenge_fi
346# ALL: scavengeFI: '%stack.0'
347name: scavenge_fi
348stack:
349  - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
350machineFunctionInfo:
351  scavengeFI: '%stack.0'
352
353body:             |
354  bb.0:
355    S_ENDPGM 0
356
357...
358