1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=PACKED %s
4
5define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
6  ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
7  ; UNPACKED: bb.1 (%ir-block.0):
8  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
9  ; UNPACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
10  ; UNPACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
11  ; UNPACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
12  ; UNPACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
13  ; UNPACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
14  ; UNPACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
15  ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
16  ; UNPACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
17  ; UNPACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
18  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
19  ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
20  ; PACKED: bb.1 (%ir-block.0):
21  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
22  ; PACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
23  ; PACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
24  ; PACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
25  ; PACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
26  ; PACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
27  ; PACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
28  ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
29  ; PACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
30  ; PACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
31  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
32  %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
33  ret half %val
34}
35
36define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
37  ; UNPACKED-LABEL: name: raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
38  ; UNPACKED: bb.1 (%ir-block.0):
39  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
40  ; UNPACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
41  ; UNPACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
42  ; UNPACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
43  ; UNPACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
44  ; UNPACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
45  ; UNPACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
46  ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
47  ; UNPACKED:   [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
48  ; UNPACKED:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0
49  ; UNPACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1
50  ; UNPACKED:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
51  ; UNPACKED:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
52  ; UNPACKED:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY8]], implicit $exec
53  ; UNPACKED:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
54  ; UNPACKED:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY9]], implicit $exec
55  ; UNPACKED:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
56  ; UNPACKED:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
57  ; UNPACKED:   [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY10]], [[V_AND_B32_e64_1]], implicit $exec
58  ; UNPACKED:   [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec
59  ; UNPACKED:   $vgpr0 = COPY [[V_OR_B32_e64_]]
60  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
61  ; PACKED-LABEL: name: raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
62  ; PACKED: bb.1 (%ir-block.0):
63  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
64  ; PACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
65  ; PACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
66  ; PACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
67  ; PACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
68  ; PACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
69  ; PACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
70  ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
71  ; PACKED:   [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7", align 1, addrspace 4)
72  ; PACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]]
73  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
74  %val = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
75  ret <2 x half> %val
76}
77
78; FIXME: Crashes
79; define amdgpu_ps <3 x half> @raw_tbuffer_load_v3f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
80;   %val = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
81;   ret <3 x half> %val
82; }
83
84define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
85  ; UNPACKED-LABEL: name: raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
86  ; UNPACKED: bb.1 (%ir-block.0):
87  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
88  ; UNPACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
89  ; UNPACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
90  ; UNPACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
91  ; UNPACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
92  ; UNPACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
93  ; UNPACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
94  ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
95  ; UNPACKED:   [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "TargetCustom7", align 1, addrspace 4)
96  ; UNPACKED:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
97  ; UNPACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
98  ; UNPACKED:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
99  ; UNPACKED:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub3
100  ; UNPACKED:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
101  ; UNPACKED:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
102  ; UNPACKED:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY6]], [[COPY10]], implicit $exec
103  ; UNPACKED:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
104  ; UNPACKED:   [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY7]], [[COPY11]], implicit $exec
105  ; UNPACKED:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
106  ; UNPACKED:   [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
107  ; UNPACKED:   [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY12]], [[V_AND_B32_e64_1]], implicit $exec
108  ; UNPACKED:   [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_]], [[V_LSHLREV_B32_e64_]], implicit $exec
109  ; UNPACKED:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
110  ; UNPACKED:   [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY8]], [[COPY13]], implicit $exec
111  ; UNPACKED:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
112  ; UNPACKED:   [[V_AND_B32_e64_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY9]], [[COPY14]], implicit $exec
113  ; UNPACKED:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
114  ; UNPACKED:   [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[V_AND_B32_e64_3]], implicit $exec
115  ; UNPACKED:   [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_AND_B32_e64_2]], [[V_LSHLREV_B32_e64_1]], implicit $exec
116  ; UNPACKED:   $vgpr0 = COPY [[V_OR_B32_e64_]]
117  ; UNPACKED:   $vgpr1 = COPY [[V_OR_B32_e64_1]]
118  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
119  ; PACKED-LABEL: name: raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
120  ; PACKED: bb.1 (%ir-block.0):
121  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
122  ; PACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
123  ; PACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
124  ; PACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
125  ; PACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
126  ; PACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
127  ; PACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
128  ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
129  ; PACKED:   [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "TargetCustom7", align 1, addrspace 4)
130  ; PACKED:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
131  ; PACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
132  ; PACKED:   $vgpr0 = COPY [[COPY6]]
133  ; PACKED:   $vgpr1 = COPY [[COPY7]]
134  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
135  %val = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
136  ret <4 x half> %val
137}
138
139define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) {
140  ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset
141  ; UNPACKED: bb.1 (%ir-block.0):
142  ; UNPACKED:   successors: %bb.2(0x80000000)
143  ; UNPACKED:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
144  ; UNPACKED:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
145  ; UNPACKED:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
146  ; UNPACKED:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
147  ; UNPACKED:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
148  ; UNPACKED:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
149  ; UNPACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4
150  ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
151  ; UNPACKED:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
152  ; UNPACKED:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
153  ; UNPACKED:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
154  ; UNPACKED:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
155  ; UNPACKED: bb.2:
156  ; UNPACKED:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
157  ; UNPACKED:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
158  ; UNPACKED:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
159  ; UNPACKED:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
160  ; UNPACKED:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec
161  ; UNPACKED:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
162  ; UNPACKED:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
163  ; UNPACKED:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
164  ; UNPACKED:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
165  ; UNPACKED:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
166  ; UNPACKED:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
167  ; UNPACKED:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
168  ; UNPACKED:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
169  ; UNPACKED:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
170  ; UNPACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
171  ; UNPACKED:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
172  ; UNPACKED:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
173  ; UNPACKED:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
174  ; UNPACKED: bb.3:
175  ; UNPACKED:   successors: %bb.4(0x80000000)
176  ; UNPACKED:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
177  ; UNPACKED: bb.4:
178  ; UNPACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
179  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
180  ; PACKED-LABEL: name: raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffset
181  ; PACKED: bb.1 (%ir-block.0):
182  ; PACKED:   successors: %bb.2(0x80000000)
183  ; PACKED:   liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
184  ; PACKED:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
185  ; PACKED:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
186  ; PACKED:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
187  ; PACKED:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
188  ; PACKED:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2
189  ; PACKED:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4
190  ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
191  ; PACKED:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
192  ; PACKED:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
193  ; PACKED:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
194  ; PACKED:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
195  ; PACKED: bb.2:
196  ; PACKED:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
197  ; PACKED:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
198  ; PACKED:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
199  ; PACKED:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
200  ; PACKED:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec
201  ; PACKED:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
202  ; PACKED:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
203  ; PACKED:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
204  ; PACKED:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
205  ; PACKED:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
206  ; PACKED:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
207  ; PACKED:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
208  ; PACKED:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
209  ; PACKED:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
210  ; PACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
211  ; PACKED:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
212  ; PACKED:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
213  ; PACKED:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
214  ; PACKED: bb.3:
215  ; PACKED:   successors: %bb.4(0x80000000)
216  ; PACKED:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
217  ; PACKED: bb.4:
218  ; PACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
219  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
220  %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
221  ret half %val
222}
223
224define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
225  ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
226  ; UNPACKED: bb.1 (%ir-block.0):
227  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
228  ; UNPACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
229  ; UNPACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
230  ; UNPACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
231  ; UNPACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
232  ; UNPACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
233  ; UNPACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
234  ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
235  ; UNPACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
236  ; UNPACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
237  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
238  ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
239  ; PACKED: bb.1 (%ir-block.0):
240  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
241  ; PACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
242  ; PACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
243  ; PACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
244  ; PACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
245  ; PACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
246  ; PACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
247  ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
248  ; PACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
249  ; PACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
250  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
251  %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
252  ret half %val
253}
254
255define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
256  ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
257  ; UNPACKED: bb.1 (%ir-block.0):
258  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
259  ; UNPACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
260  ; UNPACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
261  ; UNPACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
262  ; UNPACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
263  ; UNPACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
264  ; UNPACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
265  ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
266  ; UNPACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
267  ; UNPACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
268  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
269  ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
270  ; PACKED: bb.1 (%ir-block.0):
271  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
272  ; PACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
273  ; PACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
274  ; PACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
275  ; PACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
276  ; PACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
277  ; PACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
278  ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
279  ; PACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
280  ; PACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
281  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
282  %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
283  ret half %val
284}
285
286define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
287  ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
288  ; UNPACKED: bb.1 (%ir-block.0):
289  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
290  ; UNPACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
291  ; UNPACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
292  ; UNPACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
293  ; UNPACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
294  ; UNPACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
295  ; UNPACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
296  ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
297  ; UNPACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
298  ; UNPACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
299  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
300  ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
301  ; PACKED: bb.1 (%ir-block.0):
302  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
303  ; PACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
304  ; PACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
305  ; PACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
306  ; PACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
307  ; PACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
308  ; PACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
309  ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
310  ; PACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
311  ; PACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
312  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
313  %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
314  ret half %val
315}
316
317define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
318  ; UNPACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
319  ; UNPACKED: bb.1 (%ir-block.0):
320  ; UNPACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
321  ; UNPACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
322  ; UNPACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
323  ; UNPACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
324  ; UNPACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
325  ; UNPACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
326  ; UNPACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
327  ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
328  ; UNPACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
329  ; UNPACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
330  ; UNPACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
331  ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
332  ; PACKED: bb.1 (%ir-block.0):
333  ; PACKED:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
334  ; PACKED:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
335  ; PACKED:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
336  ; PACKED:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
337  ; PACKED:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
338  ; PACKED:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
339  ; PACKED:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
340  ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
341  ; PACKED:   [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
342  ; PACKED:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
343  ; PACKED:   SI_RETURN_TO_EPILOG implicit $vgpr0
344  %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
345  ret half %val
346}
347
348declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
349declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
350declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
351declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
352
353attributes #0 = { nounwind readonly }
354