1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s 4 5define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 6 ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 7 ; UNPACKED: bb.1 (%ir-block.0): 8 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 9 ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 10 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 11 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 12 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 13 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 14 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 15 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 16 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 17 ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] 18 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 19 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 20 ; PACKED: bb.1 (%ir-block.0): 21 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 22 ; PACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 23 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 24 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 25 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 26 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 27 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 28 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 29 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 30 ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] 31 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 32 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 33 ret half %val 34} 35 36define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 37 ; UNPACKED-LABEL: name: raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 38 ; UNPACKED: bb.1 (%ir-block.0): 39 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 40 ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 41 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 42 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 43 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 44 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 45 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 46 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 47 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) 48 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 49 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 50 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 51 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 52 ; UNPACKED: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY8]], implicit $exec 53 ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 54 ; UNPACKED: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec 55 ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 56 ; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 57 ; UNPACKED: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY10]], [[V_AND_B32_e64_1]], implicit $exec 58 ; UNPACKED: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec 59 ; UNPACKED: $vgpr0 = COPY [[V_OR_B32_e64_]] 60 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 61 ; PACKED-LABEL: name: raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 62 ; PACKED: bb.1 (%ir-block.0): 63 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 64 ; PACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 65 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 66 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 67 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 68 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 69 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 70 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 71 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4) 72 ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]] 73 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 74 %val = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 75 ret <2 x half> %val 76} 77 78; FIXME: Crashes 79; define amdgpu_ps <3 x half> @raw_tbuffer_load_v3f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 80; %val = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 81; ret <3 x half> %val 82; } 83 84define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 85 ; UNPACKED-LABEL: name: raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 86 ; UNPACKED: bb.1 (%ir-block.0): 87 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 88 ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 89 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 90 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 91 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 92 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 93 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 94 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 95 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "TargetCustom7", align 1, addrspace 4) 96 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 97 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 98 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 99 ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3 100 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 101 ; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 102 ; UNPACKED: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec 103 ; UNPACKED: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 104 ; UNPACKED: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec 105 ; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16 106 ; UNPACKED: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 107 ; UNPACKED: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec 108 ; UNPACKED: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec 109 ; UNPACKED: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 110 ; UNPACKED: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec 111 ; UNPACKED: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 112 ; UNPACKED: [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec 113 ; UNPACKED: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 114 ; UNPACKED: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec 115 ; UNPACKED: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec 116 ; UNPACKED: $vgpr0 = COPY [[V_OR_B32_e64_]] 117 ; UNPACKED: $vgpr1 = COPY [[V_OR_B32_e64_1]] 118 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 119 ; PACKED-LABEL: name: raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset 120 ; PACKED: bb.1 (%ir-block.0): 121 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 122 ; PACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 123 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 124 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 125 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 126 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 127 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 128 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 129 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "TargetCustom7", align 1, addrspace 4) 130 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 131 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 132 ; PACKED: $vgpr0 = COPY [[COPY6]] 133 ; PACKED: $vgpr1 = COPY [[COPY7]] 134 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 135 %val = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 136 ret <4 x half> %val 137} 138 139define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { 140 ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset 141 ; UNPACKED: bb.1 (%ir-block.0): 142 ; UNPACKED: successors: %bb.2(0x80000000) 143 ; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 144 ; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 145 ; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 146 ; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 147 ; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 148 ; UNPACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 149 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 150 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 151 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 152 ; UNPACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 153 ; UNPACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 154 ; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 155 ; UNPACKED: bb.2: 156 ; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) 157 ; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 158 ; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 159 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 160 ; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 161 ; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 162 ; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 163 ; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 164 ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 165 ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 166 ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 167 ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 168 ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 169 ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 170 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 171 ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 172 ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 173 ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec 174 ; UNPACKED: bb.3: 175 ; UNPACKED: successors: %bb.4(0x80000000) 176 ; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 177 ; UNPACKED: bb.4: 178 ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] 179 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 180 ; PACKED-LABEL: name: raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset 181 ; PACKED: bb.1 (%ir-block.0): 182 ; PACKED: successors: %bb.2(0x80000000) 183 ; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 184 ; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 185 ; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 186 ; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 187 ; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 188 ; PACKED: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 189 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 190 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 191 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 192 ; PACKED: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 193 ; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 194 ; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 195 ; PACKED: bb.2: 196 ; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000) 197 ; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec 198 ; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec 199 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 200 ; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec 201 ; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec 202 ; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec 203 ; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 204 ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec 205 ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 206 ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 207 ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 208 ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 209 ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc 210 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 211 ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 212 ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 213 ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec 214 ; PACKED: bb.3: 215 ; PACKED: successors: %bb.4(0x80000000) 216 ; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 217 ; PACKED: bb.4: 218 ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] 219 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 220 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 221 ret half %val 222} 223 224define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 225 ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 226 ; UNPACKED: bb.1 (%ir-block.0): 227 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 228 ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 229 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 230 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 231 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 232 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 233 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 234 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 235 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 236 ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] 237 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 238 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 239 ; PACKED: bb.1 (%ir-block.0): 240 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 241 ; PACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 242 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 243 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 244 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 245 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 246 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 247 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 248 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 249 ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] 250 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 251 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) 252 ret half %val 253} 254 255define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 256 ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 257 ; UNPACKED: bb.1 (%ir-block.0): 258 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 259 ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 260 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 261 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 262 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 263 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 264 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 265 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 266 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 267 ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] 268 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 269 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 270 ; PACKED: bb.1 (%ir-block.0): 271 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 272 ; PACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 273 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 274 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 275 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 276 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 277 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 278 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 279 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 280 ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] 281 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 282 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) 283 ret half %val 284} 285 286define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 287 ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc 288 ; UNPACKED: bb.1 (%ir-block.0): 289 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 290 ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 291 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 292 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 293 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 294 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 295 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 296 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 297 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 298 ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] 299 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 300 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc 301 ; PACKED: bb.1 (%ir-block.0): 302 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 303 ; PACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 304 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 305 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 306 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 307 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 308 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 309 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 310 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 311 ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] 312 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 313 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) 314 ret half %val 315} 316 317define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 318 ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 319 ; UNPACKED: bb.1 (%ir-block.0): 320 ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 321 ; UNPACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 322 ; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 323 ; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 324 ; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 325 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 326 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 327 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 328 ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 329 ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] 330 ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 331 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 332 ; PACKED: bb.1 (%ir-block.0): 333 ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 334 ; PACKED: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 335 ; PACKED: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 336 ; PACKED: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 337 ; PACKED: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 338 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 339 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 340 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 341 ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4) 342 ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] 343 ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 344 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) 345 ret half %val 346} 347 348declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0 349declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0 350declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0 351declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0 352 353attributes #0 = { nounwind readonly } 354