1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s 3 4# Test what happens when an SGPR is unavailable for the unused add 5# carry out when materializing the frame index. 6 7 8# There are truly no free SGPRs, so the entire frame index expansion 9# needs to be inverted to restore the original frame register. 10 11--- 12name: scavenge_sgpr_pei_no_sgprs 13tracksRegLiveness: true 14 15stack: 16 - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } 17 - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } 18 19machineFunctionInfo: 20 isEntryFunction: false 21 scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 22 frameOffsetReg: $sgpr33 23 stackPtrOffsetReg: $sgpr32 24 25body: | 26 bb.0: 27 liveins: $vgpr1 28 29 ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs 30 ; CHECK: liveins: $vgpr1, $vgpr2 31 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec 32 ; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc 33 ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) 34 ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 35 ; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 36 ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc 37 ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc 38 ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc 39 ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc 40 ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec 41 ; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc 42 ; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc 43 ; CHECK: $vgpr3 = COPY killed $sgpr33 44 ; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc 45 ; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc 46 ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 47 ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc 48 ; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0 49 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec 50 ; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc 51 ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) 52 ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 53 ; CHECK: S_ENDPGM 0, implicit $vcc 54 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc 55 $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec 56 $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 57 S_ENDPGM 0, implicit $vcc 58... 59 60# One 32-bit SGPR is available for the intermediate scale computation, 61# so only an extra copy to VALU is necessary. 62 63--- 64name: scavenge_sgpr_pei_one_sgpr 65tracksRegLiveness: true 66 67stack: 68 - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } 69 - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } 70 71machineFunctionInfo: 72 isEntryFunction: false 73 scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 74 frameOffsetReg: $sgpr33 75 stackPtrOffsetReg: $sgpr32 76 77body: | 78 bb.0: 79 liveins: $vgpr1 80 81 ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr 82 ; CHECK: liveins: $sgpr29, $vgpr1 83 ; CHECK: $sgpr29 = frame-setup COPY $sgpr33 84 ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc 85 ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc 86 ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc 87 ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc 88 ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec 89 ; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc 90 ; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc 91 ; CHECK: $vgpr2 = COPY killed $sgpr33 92 ; CHECK: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc 93 ; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc 94 ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 95 ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc 96 ; CHECK: $sgpr33 = frame-destroy COPY $sgpr29 97 ; CHECK: S_ENDPGM 0, implicit $vcc 98 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc 99 $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec 100 $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 101 S_ENDPGM 0, implicit $vcc 102... 103 104# When only one 64-bit SGPR is available for the unused carry out pre gfx9, 105# we must reuse one of the 32-bit SGPR sub-regs to materialize the offset. 106 107--- 108name: scavenge_sgpr_pei_one_sgpr_64 109tracksRegLiveness: true 110 111stack: 112 - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } 113 - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } 114 115machineFunctionInfo: 116 isEntryFunction: false 117 scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 118 frameOffsetReg: $sgpr33 119 stackPtrOffsetReg: $sgpr32 120 121body: | 122 bb.0: 123 liveins: $vgpr1 124 125 ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64 126 ; CHECK: liveins: $sgpr28, $vgpr1 127 ; CHECK: $sgpr28 = frame-setup COPY $sgpr33 128 ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc 129 ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc 130 ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc 131 ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc 132 ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec 133 ; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc 134 ; CHECK: $sgpr29 = S_ADD_I32 killed $sgpr29, 8192, implicit-def $scc 135 ; CHECK: $vgpr2 = COPY killed $sgpr29 136 ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 137 ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc 138 ; CHECK: $sgpr33 = frame-destroy COPY $sgpr28 139 ; CHECK: S_ENDPGM 0, implicit $vcc 140 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc 141 $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec 142 $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 143 S_ENDPGM 0, implicit $vcc 144... 145 146# Prefer to use vcc as unused carry out. 147 148--- 149name: scavenge_sgpr_pei_prefer_vcc 150tracksRegLiveness: true 151 152stack: 153 - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } 154 - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } 155 156machineFunctionInfo: 157 isEntryFunction: false 158 scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 159 frameOffsetReg: $sgpr33 160 stackPtrOffsetReg: $sgpr32 161 162body: | 163 bb.0: 164 liveins: $vgpr1 165 166 ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc 167 ; CHECK: liveins: $sgpr28, $vgpr1 168 ; CHECK: $sgpr28 = frame-setup COPY $sgpr33 169 ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc 170 ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc 171 ; CHECK: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc 172 ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 173 ; CHECK: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec 174 ; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec 175 ; CHECK: $vcc_lo = S_MOV_B32 8192 176 ; CHECK: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec 177 ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 178 ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def $scc 179 ; CHECK: $sgpr33 = frame-destroy COPY $sgpr28 180 ; CHECK: S_ENDPGM 0 181 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 182 $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec 183 $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 184 S_ENDPGM 0 185... 186