1# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s 2# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s 3 4 5--- 6# ALL-LABEL: name: kernel0 7# FULL: machineFunctionInfo: 8# FULL-NEXT: explicitKernArgSize: 128 9# FULL-NEXT: maxKernArgAlign: 64 10# FULL-NEXT: ldsSize: 2048 11# FULL-NEXT: dynLDSAlign: 1 12# FULL-NEXT: isEntryFunction: true 13# FULL-NEXT: noSignedZerosFPMath: false 14# FULL-NEXT: memoryBound: true 15# FULL-NEXT: waveLimiter: true 16# FULL-NEXT: hasSpilledSGPRs: false 17# FULL-NEXT: hasSpilledVGPRs: false 18# FULL-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' 19# FULL-NEXT: frameOffsetReg: '$sgpr12' 20# FULL-NEXT: stackPtrOffsetReg: '$sgpr13' 21# FULL-NEXT: argumentInfo: 22# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 23# FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } 24# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' } 25# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } 26# FULL-NEXT: workItemIDX: { reg: '$vgpr0' } 27# FULL-NEXT: mode: 28# FULL-NEXT: ieee: true 29# FULL-NEXT: dx10-clamp: true 30# FULL-NEXT: fp32-input-denormals: true 31# FULL-NEXT: fp32-output-denormals: true 32# FULL-NEXT: fp64-fp16-input-denormals: true 33# FULL-NEXT: fp64-fp16-output-denormals: true 34# FULL-NEXT: highBitsOf32BitAddress: 0 35# FULL-NEXT: occupancy: 10 36# FULL-NEXT: body: 37 38# SIMPLE: machineFunctionInfo: 39# SIMPLE-NEXT: explicitKernArgSize: 128 40# SIMPLE-NEXT: maxKernArgAlign: 64 41# SIMPLE-NEXT: ldsSize: 2048 42# SIMPLE-NEXT: isEntryFunction: true 43# SIMPLE-NEXT: memoryBound: true 44# SIMPLE-NEXT: waveLimiter: true 45# SIMPLE-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' 46# SIMPLE-NEXT: frameOffsetReg: '$sgpr12' 47# SIMPLE-NEXT: stackPtrOffsetReg: '$sgpr13' 48# SIMPLE-NEXT: argumentInfo: 49# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 50# SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } 51# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' } 52# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } 53# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' } 54# SIMPLE-NEXT: occupancy: 10 55# SIMPLE-NEXT: body: 56name: kernel0 57machineFunctionInfo: 58 explicitKernArgSize: 128 59 maxKernArgAlign: 64 60 ldsSize: 2048 61 isEntryFunction: true 62 noSignedZerosFPMath: false 63 memoryBound: true 64 waveLimiter: true 65 scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' 66 frameOffsetReg: '$sgpr12' 67 stackPtrOffsetReg: '$sgpr13' 68 argumentInfo: 69 privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 70 kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } 71 workGroupIDX: { reg: '$sgpr6' } 72 privateSegmentWaveByteOffset: { reg: '$sgpr7' } 73 workItemIDX: { reg: '$vgpr0' } 74body: | 75 bb.0: 76 S_ENDPGM 0 77 78... 79 80# FIXME: Should be able to not print section for simple 81--- 82# ALL-LABEL: name: no_mfi 83# FULL: machineFunctionInfo: 84# FULL-NEXT: explicitKernArgSize: 0 85# FULL-NEXT: maxKernArgAlign: 1 86# FULL-NEXT: ldsSize: 0 87# FULL-NEXT: dynLDSAlign: 1 88# FULL-NEXT: isEntryFunction: false 89# FULL-NEXT: noSignedZerosFPMath: false 90# FULL-NEXT: memoryBound: false 91# FULL-NEXT: waveLimiter: false 92# FULL-NEXT: hasSpilledSGPRs: false 93# FULL-NEXT: hasSpilledVGPRs: false 94# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' 95# FULL-NEXT: frameOffsetReg: '$fp_reg' 96# FULL-NEXT: stackPtrOffsetReg: '$sp_reg' 97# FULL-NEXT: argumentInfo: 98# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 99# FULL-NEXT: mode: 100# FULL-NEXT: ieee: true 101# FULL-NEXT: dx10-clamp: true 102# FULL-NEXT: fp32-input-denormals: true 103# FULL-NEXT: fp32-output-denormals: true 104# FULL-NEXT: fp64-fp16-input-denormals: true 105# FULL-NEXT: fp64-fp16-output-denormals: true 106# FULL-NEXT: highBitsOf32BitAddress: 0 107# FULL-NEXT: occupancy: 10 108# FULL-NEXT: body: 109 110# SIMPLE: machineFunctionInfo: 111# SIMPLE-NEXT: maxKernArgAlign: 1 112# SIMPLE-NEXT: argumentInfo: 113# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 114# SIMPLE-NEXT: occupancy: 10 115# SIMPLE-NEXT: body: 116 117name: no_mfi 118body: | 119 bb.0: 120 S_ENDPGM 0 121 122... 123 124--- 125# ALL-LABEL: name: empty_mfi 126# FULL: machineFunctionInfo: 127# FULL-NEXT: explicitKernArgSize: 0 128# FULL-NEXT: maxKernArgAlign: 1 129# FULL-NEXT: ldsSize: 0 130# FULL-NEXT: dynLDSAlign: 1 131# FULL-NEXT: isEntryFunction: false 132# FULL-NEXT: noSignedZerosFPMath: false 133# FULL-NEXT: memoryBound: false 134# FULL-NEXT: waveLimiter: false 135# FULL-NEXT: hasSpilledSGPRs: false 136# FULL-NEXT: hasSpilledVGPRs: false 137# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' 138# FULL-NEXT: frameOffsetReg: '$fp_reg' 139# FULL-NEXT: stackPtrOffsetReg: '$sp_reg' 140# FULL-NEXT: argumentInfo: 141# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 142# FULL-NEXT: mode: 143# FULL-NEXT: ieee: true 144# FULL-NEXT: dx10-clamp: true 145# FULL-NEXT: fp32-input-denormals: true 146# FULL-NEXT: fp32-output-denormals: true 147# FULL-NEXT: fp64-fp16-input-denormals: true 148# FULL-NEXT: fp64-fp16-output-denormals: true 149# FULL-NEXT: highBitsOf32BitAddress: 0 150# FULL-NEXT: occupancy: 10 151# FULL-NEXT: body: 152 153# SIMPLE: machineFunctionInfo: 154# SIMPLE-NEXT: maxKernArgAlign: 1 155# SIMPLE-NEXT: argumentInfo: 156# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 157# SIMPLE-NEXT: occupancy: 10 158# SIMPLE-NEXT: body: 159 160name: empty_mfi 161machineFunctionInfo: 162body: | 163 bb.0: 164 S_ENDPGM 0 165 166... 167 168--- 169# ALL-LABEL: name: empty_mfi_entry_func 170# FULL: machineFunctionInfo: 171# FULL-NEXT: explicitKernArgSize: 0 172# FULL-NEXT: maxKernArgAlign: 1 173# FULL-NEXT: ldsSize: 0 174# FULL-NEXT: dynLDSAlign: 1 175# FULL-NEXT: isEntryFunction: true 176# FULL-NEXT: noSignedZerosFPMath: false 177# FULL-NEXT: memoryBound: false 178# FULL-NEXT: waveLimiter: false 179# FULL-NEXT: hasSpilledSGPRs: false 180# FULL-NEXT: hasSpilledVGPRs: false 181# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' 182# FULL-NEXT: frameOffsetReg: '$fp_reg' 183# FULL-NEXT: stackPtrOffsetReg: '$sp_reg' 184# FULL-NEXT: argumentInfo: 185# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 186# FULL-NEXT: mode: 187# FULL-NEXT: ieee: true 188# FULL-NEXT: dx10-clamp: true 189# FULL-NEXT: fp32-input-denormals: true 190# FULL-NEXT: fp32-output-denormals: true 191# FULL-NEXT: fp64-fp16-input-denormals: true 192# FULL-NEXT: fp64-fp16-output-denormals: true 193# FULL-NEXT: highBitsOf32BitAddress: 0 194# FULL-NEXT: occupancy: 10 195# FULL-NEXT: body: 196 197# SIMPLE: machineFunctionInfo: 198# SIMPLE-NEXT: maxKernArgAlign: 1 199# SIMPLE-NEXT: isEntryFunction: true 200# SIMPLE-NEXT: argumentInfo: 201# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 202# SIMPLE-NEXT: occupancy: 10 203# SIMPLE-NEXT: body: 204 205name: empty_mfi_entry_func 206machineFunctionInfo: 207 isEntryFunction: true 208body: | 209 bb.0: 210 S_ENDPGM 0 211 212... 213 214--- 215# ALL-LABEL: name: default_regs_mfi 216 217# FULL: scratchRSrcReg: '$private_rsrc_reg' 218# FULL-NEXT: frameOffsetReg: '$fp_reg' 219# FULL-NEXT: stackPtrOffsetReg: '$sp_reg' 220 221# SIMPLE-NOT: scratchRSrcReg 222# SIMPLE-NOT:: stackPtrOffsetReg 223name: default_regs_mfi 224machineFunctionInfo: 225 scratchRSrcReg: '$private_rsrc_reg' 226 227body: | 228 bb.0: 229 S_ENDPGM 0 230 231... 232 233--- 234# ALL-LABEL: name: fake_stack_arginfo 235 236# FULL: argumentInfo: 237# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 238# FULL-NEXT: flatScratchInit: { offset: 4 } 239# FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 } 240 241# SIMPLE: argumentInfo: 242# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } 243# SIMPLE-NEXT: flatScratchInit: { offset: 4 } 244# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 } 245name: fake_stack_arginfo 246machineFunctionInfo: 247 argumentInfo: 248 flatScratchInit: { offset: 4 } 249 workItemIDY: { reg: '$vgpr0' , mask: 0xff00 } 250 251body: | 252 bb.0: 253 S_ENDPGM 0 254 255... 256 257--- 258# ALL-LABEL: name: parse_mode 259# ALL: mode: 260# ALL-NEXT: ieee: false 261# ALL-NEXT: dx10-clamp: false 262# ALL-NEXT: fp32-input-denormals: false 263# ALL-NEXT: fp32-output-denormals: false 264# ALL-NEXT: fp64-fp16-input-denormals: false 265# ALL-NEXT: fp64-fp16-output-denormals: false 266 267name: parse_mode 268machineFunctionInfo: 269 mode: 270 ieee: false 271 dx10-clamp: false 272 fp32-input-denormals: false 273 fp32-output-denormals: false 274 fp64-fp16-input-denormals: false 275 fp64-fp16-output-denormals: false 276 277body: | 278 bb.0: 279 S_ENDPGM 0 280 281... 282 283 284--- 285# ALL-LABEL: name: parse_spilled_regs 286# ALL: machineFunctionInfo: 287# ALL: hasSpilledSGPRs: true 288# ALL-NEXT: hasSpilledVGPRs: true 289 290name: parse_spilled_regs 291machineFunctionInfo: 292 hasSpilledSGPRs: true 293 hasSpilledVGPRs: true 294 295body: | 296 bb.0: 297 S_ENDPGM 0 298 299... 300 301--- 302# ALL-LABEL: name: dyn_lds_with_alignment 303 304# FULL: ldsSize: 0 305# FULL-NEXT: dynLDSAlign: 8 306 307# SIMPLE: dynLDSAlign: 8 308name: dyn_lds_with_alignment 309machineFunctionInfo: 310 dynLDSAlign: 8 311 312body: | 313 bb.0: 314 S_ENDPGM 0 315 316... 317 318--- 319# ALL-LABEL: name: occupancy_0 320# ALL: occupancy: 10 321name: occupancy_0 322machineFunctionInfo: 323 occupancy: 0 324 325body: | 326 bb.0: 327 S_ENDPGM 0 328 329... 330 331--- 332# ALL-LABEL: name: occupancy_3 333# ALL: occupancy: 3 334name: occupancy_3 335machineFunctionInfo: 336 occupancy: 3 337 338body: | 339 bb.0: 340 S_ENDPGM 0 341 342... 343 344--- 345# ALL-LABEL: name: scavenge_fi 346# ALL: scavengeFI: '%stack.0' 347name: scavenge_fi 348stack: 349 - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } 350machineFunctionInfo: 351 scavengeFI: '%stack.0' 352 353body: | 354 bb.0: 355 S_ENDPGM 0 356 357... 358