1# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s 2 3# This previously would produce a bundle that could not be satisfied 4# due to using nearly the entire register budget and not considering 5# the alignment requirement of large SGPR tuples. 6 7--- 8name: soft_clause_bundle_out_of_registers 9tracksRegLiveness: true 10machineFunctionInfo: 11 isEntryFunction: true 12 waveLimiter: true 13 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 14 stackPtrOffsetReg: '$sgpr32' 15 occupancy: 6 16body: | 17 ; CHECK-LABEL: name: soft_clause_bundle_out_of_registers 18 ; CHECK: bb.0: 19 ; CHECK: successors: %bb.1(0x80000000) 20 ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load (s512), align 4, addrspace 4) 21 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load (s512), align 4, addrspace 4) 22 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load (s512), align 4, addrspace 4) 23 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load (s512), align 4, addrspace 4) 24 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM4:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load (s512), align 4, addrspace 4) 25 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM5:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load (s512), align 4, addrspace 4) 26 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM6:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load (s512), align 4, addrspace 4) 27 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu_highregs, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 28 bb.0: 29 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6 30 31 %0:sgpr_64 = COPY $sgpr4_sgpr5 32 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), addrspace 4) 33 %2:vreg_64 = IMPLICIT_DEF 34 35 bb.1: 36 undef %3.sub0:sreg_64 = S_ADD_U32 %1.sub0, 0, implicit-def $scc 37 %3.sub1:sreg_64 = S_ADDC_U32 %1.sub1, 0, implicit-def dead $scc, implicit killed $scc 38 %4:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load (s512), align 4, addrspace 4) 39 %5:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load (s512), align 4, addrspace 4) 40 %6:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load (s512), align 4, addrspace 4) 41 %7:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load (s512), align 4, addrspace 4) 42 %8:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load (s512), align 4, addrspace 4) 43 %9:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load (s512), align 4, addrspace 4) 44 %10:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load (s512), align 4, addrspace 4) 45 dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu_highregs, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 46 dead %11:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub1, 0, 0, implicit $mode, implicit $exec 47 dead %12:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub2, 0, 0, implicit $mode, implicit $exec 48 dead %13:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub3, 0, 0, implicit $mode, implicit $exec 49 dead %14:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub4, 0, 0, implicit $mode, implicit $exec 50 dead %15:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub5, 0, 0, implicit $mode, implicit $exec 51 dead %16:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub6, 0, 0, implicit $mode, implicit $exec 52 dead %17:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub7, 0, 0, implicit $mode, implicit $exec 53 dead %18:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub8, 0, 0, implicit $mode, implicit $exec 54 dead %19:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub9, 0, 0, implicit $mode, implicit $exec 55 dead %20:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub10, 0, 0, implicit $mode, implicit $exec 56 dead %21:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub11, 0, 0, implicit $mode, implicit $exec 57 dead %22:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub12, 0, 0, implicit $mode, implicit $exec 58 dead %23:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub13, 0, 0, implicit $mode, implicit $exec 59 dead %24:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub14, 0, 0, implicit $mode, implicit $exec 60 dead %25:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub15, 0, 0, implicit $mode, implicit $exec 61 dead %26:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub0, 0, 0, implicit $mode, implicit $exec 62 dead %27:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub1, 0, 0, implicit $mode, implicit $exec 63 dead %28:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub2, 0, 0, implicit $mode, implicit $exec 64 dead %29:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub3, 0, 0, implicit $mode, implicit $exec 65 dead %30:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub4, 0, 0, implicit $mode, implicit $exec 66 dead %31:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub5, 0, 0, implicit $mode, implicit $exec 67 dead %32:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub6, 0, 0, implicit $mode, implicit $exec 68 dead %33:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub7, 0, 0, implicit $mode, implicit $exec 69 dead %34:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub8, 0, 0, implicit $mode, implicit $exec 70 dead %35:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub9, 0, 0, implicit $mode, implicit $exec 71 dead %36:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub10, 0, 0, implicit $mode, implicit $exec 72 dead %37:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub11, 0, 0, implicit $mode, implicit $exec 73 dead %38:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub12, 0, 0, implicit $mode, implicit $exec 74 dead %39:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub13, 0, 0, implicit $mode, implicit $exec 75 dead %40:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub14, 0, 0, implicit $mode, implicit $exec 76 dead %41:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub15, 0, 0, implicit $mode, implicit $exec 77 dead %42:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub0, 0, 0, implicit $mode, implicit $exec 78 dead %43:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub1, 0, 0, implicit $mode, implicit $exec 79 dead %44:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub2, 0, 0, implicit $mode, implicit $exec 80 dead %45:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub3, 0, 0, implicit $mode, implicit $exec 81 dead %46:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub4, 0, 0, implicit $mode, implicit $exec 82 dead %47:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub5, 0, 0, implicit $mode, implicit $exec 83 dead %48:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub8, 0, 0, implicit $mode, implicit $exec 84 dead %49:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub9, 0, 0, implicit $mode, implicit $exec 85 dead %50:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub10, 0, 0, implicit $mode, implicit $exec 86 dead %51:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub11, 0, 0, implicit $mode, implicit $exec 87 dead %52:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub12, 0, 0, implicit $mode, implicit $exec 88 dead %53:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub14, 0, 0, implicit $mode, implicit $exec 89 dead %54:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub15, 0, 0, implicit $mode, implicit $exec 90 dead %55:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %9.sub10, 0, 0, implicit $mode, implicit $exec 91 dead %56:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %6.sub0, 0, 0, implicit $mode, implicit $exec 92 dead %57:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %7.sub10, 0, 0, implicit $mode, implicit $exec 93 dead %58:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %10.sub0, 0, 0, implicit $mode, implicit $exec 94 S_CMP_LG_U32 0, 0, implicit-def $scc 95 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 96 S_BRANCH %bb.2 97 98 bb.2: 99 S_ENDPGM 0 100 101... 102 103# Case with simple clause which exceeds the pressure limit, though 104# didn't hit the register allocator error. 105 106--- 107name: simple_huge_reg_tuple_clause 108tracksRegLiveness: true 109machineFunctionInfo: 110 isEntryFunction: true 111 waveLimiter: false 112 memoryBound: false 113 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 114 stackPtrOffsetReg: '$sgpr32' 115 occupancy: 10 116 117body: | 118 bb.0: 119 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6 120 ; CHECK-LABEL: name: simple_huge_reg_tuple_clause 121 ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load (s512), align 4, addrspace 4) 122 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load (s512), align 4, addrspace 4) 123 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load (s512), align 4, addrspace 4) 124 ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load (s512), align 4, addrspace 4) 125 ; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORDX16_IMM]] 126 %0:sreg_64 = COPY $sgpr4_sgpr5 127 %1:sreg_64 = S_MOV_B64 0 128 %2:sreg_64 = S_MOV_B64 1 129 %3:sreg_64 = S_MOV_B64 2 130 %4:sreg_64 = S_MOV_B64 3 131 %5:sreg_64 = S_MOV_B64 4 132 %6:sreg_64 = S_MOV_B64 5 133 %7:sreg_64 = S_MOV_B64 6 134 %8:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load (s512), align 4, addrspace 4) 135 %9:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load (s512), align 4, addrspace 4) 136 %10:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load (s512), align 4, addrspace 4) 137 %11:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load (s512), align 4, addrspace 4) 138 S_NOP 0, implicit %8 139 S_NOP 0, implicit %9 140 S_NOP 0, implicit %10 141 S_NOP 0, implicit %11 142 S_NOP 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5 143 S_NOP 0, implicit %6 144 S_NOP 0, implicit %7 145 S_ENDPGM 0 146 147... 148