1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck -check-prefix=FAST %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck -check-prefix=GREEDY %s
4
5; Natural mapping
6define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
7  ; FAST-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
8  ; FAST: bb.1 (%ir-block.0):
9  ; FAST:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
10  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
11  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
12  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
13  ; FAST:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
14  ; FAST:   [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
15  ; FAST:   [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
16  ; FAST:   [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
17  ; FAST:   [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
18  ; FAST:   [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
19  ; FAST:   [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
20  ; FAST:   [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
21  ; FAST:   [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
22  ; FAST:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
23  ; FAST:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
24  ; FAST:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
25  ; FAST:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
26  ; FAST:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
27  ; FAST:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
28  ; FAST:   S_ENDPGM 0
29  ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
30  ; GREEDY: bb.1 (%ir-block.0):
31  ; GREEDY:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
32  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
33  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
34  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
35  ; GREEDY:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
36  ; GREEDY:   [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
37  ; GREEDY:   [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
38  ; GREEDY:   [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
39  ; GREEDY:   [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
40  ; GREEDY:   [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
41  ; GREEDY:   [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
42  ; GREEDY:   [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
43  ; GREEDY:   [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
44  ; GREEDY:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
45  ; GREEDY:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
46  ; GREEDY:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
47  ; GREEDY:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
48  ; GREEDY:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
49  ; GREEDY:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
50  ; GREEDY:   S_ENDPGM 0
51  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
52  store <4 x float> %v, <4 x float> addrspace(1)* undef
53  ret void
54}
55
56; Copy required for VGPR input
57define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float inreg %s) {
58  ; FAST-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
59  ; FAST: bb.1 (%ir-block.0):
60  ; FAST:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14
61  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
62  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
63  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
64  ; FAST:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
65  ; FAST:   [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
66  ; FAST:   [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
67  ; FAST:   [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
68  ; FAST:   [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
69  ; FAST:   [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
70  ; FAST:   [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
71  ; FAST:   [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
72  ; FAST:   [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
73  ; FAST:   [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14
74  ; FAST:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
75  ; FAST:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
76  ; FAST:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
77  ; FAST:   [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
78  ; FAST:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
79  ; FAST:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
80  ; FAST:   S_ENDPGM 0
81  ; GREEDY-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
82  ; GREEDY: bb.1 (%ir-block.0):
83  ; GREEDY:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14
84  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
85  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
86  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
87  ; GREEDY:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
88  ; GREEDY:   [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
89  ; GREEDY:   [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
90  ; GREEDY:   [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
91  ; GREEDY:   [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
92  ; GREEDY:   [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
93  ; GREEDY:   [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
94  ; GREEDY:   [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
95  ; GREEDY:   [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
96  ; GREEDY:   [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14
97  ; GREEDY:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
98  ; GREEDY:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
99  ; GREEDY:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
100  ; GREEDY:   [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
101  ; GREEDY:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
102  ; GREEDY:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
103  ; GREEDY:   S_ENDPGM 0
104  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
105  store <4 x float> %v, <4 x float> addrspace(1)* undef
106  ret void
107}
108
109; Waterfall loop for rsrc
110define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsrc, <4 x i32> inreg %samp, float %s) {
111  ; FAST-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
112  ; FAST: bb.1 (%ir-block.0):
113  ; FAST:   successors: %bb.2(0x80000000)
114  ; FAST:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
115  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
116  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
117  ; FAST:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
118  ; FAST:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
119  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
120  ; FAST:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
121  ; FAST:   [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
122  ; FAST:   [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
123  ; FAST:   [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
124  ; FAST:   [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
125  ; FAST:   [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
126  ; FAST:   [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
127  ; FAST:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
128  ; FAST:   [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
129  ; FAST:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
130  ; FAST:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
131  ; FAST:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
132  ; FAST:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
133  ; FAST:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
134  ; FAST:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
135  ; FAST: bb.2:
136  ; FAST:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
137  ; FAST:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
138  ; FAST:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
139  ; FAST:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
140  ; FAST:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
141  ; FAST:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
142  ; FAST:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
143  ; FAST:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
144  ; FAST:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
145  ; FAST:   [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
146  ; FAST:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
147  ; FAST:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
148  ; FAST:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
149  ; FAST:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
150  ; FAST:   [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
151  ; FAST:   [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
152  ; FAST:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
153  ; FAST:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
154  ; FAST:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
155  ; FAST:   [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
156  ; FAST:   [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
157  ; FAST:   [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
158  ; FAST:   [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
159  ; FAST:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
160  ; FAST:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
161  ; FAST:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
162  ; FAST:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
163  ; FAST: bb.3:
164  ; FAST:   successors: %bb.4(0x80000000)
165  ; FAST:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
166  ; FAST: bb.4:
167  ; FAST:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
168  ; FAST:   S_ENDPGM 0
169  ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
170  ; GREEDY: bb.1 (%ir-block.0):
171  ; GREEDY:   successors: %bb.2(0x80000000)
172  ; GREEDY:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
173  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
174  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
175  ; GREEDY:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
176  ; GREEDY:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
177  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
178  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
179  ; GREEDY:   [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
180  ; GREEDY:   [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
181  ; GREEDY:   [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
182  ; GREEDY:   [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
183  ; GREEDY:   [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
184  ; GREEDY:   [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
185  ; GREEDY:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
186  ; GREEDY:   [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
187  ; GREEDY:   [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
188  ; GREEDY:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
189  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
190  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
191  ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
192  ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
193  ; GREEDY: bb.2:
194  ; GREEDY:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
195  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
196  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
197  ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
198  ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
199  ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
200  ; GREEDY:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
201  ; GREEDY:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
202  ; GREEDY:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
203  ; GREEDY:   [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
204  ; GREEDY:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
205  ; GREEDY:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
206  ; GREEDY:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
207  ; GREEDY:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
208  ; GREEDY:   [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
209  ; GREEDY:   [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
210  ; GREEDY:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
211  ; GREEDY:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
212  ; GREEDY:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
213  ; GREEDY:   [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
214  ; GREEDY:   [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
215  ; GREEDY:   [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
216  ; GREEDY:   [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
217  ; GREEDY:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
218  ; GREEDY:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
219  ; GREEDY:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
220  ; GREEDY:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
221  ; GREEDY: bb.3:
222  ; GREEDY:   successors: %bb.4(0x80000000)
223  ; GREEDY:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
224  ; GREEDY: bb.4:
225  ; GREEDY:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
226  ; GREEDY:   S_ENDPGM 0
227  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
228  store <4 x float> %v, <4 x float> addrspace(1)* undef
229  ret void
230}
231
232; Waterfall loop for sampler
233define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> %samp, float %s) {
234  ; FAST-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
235  ; FAST: bb.1 (%ir-block.0):
236  ; FAST:   successors: %bb.2(0x80000000)
237  ; FAST:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
238  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
239  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
240  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
241  ; FAST:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
242  ; FAST:   [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
243  ; FAST:   [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
244  ; FAST:   [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
245  ; FAST:   [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
246  ; FAST:   [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
247  ; FAST:   [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
248  ; FAST:   [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
249  ; FAST:   [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
250  ; FAST:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
251  ; FAST:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
252  ; FAST:   [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
253  ; FAST:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
254  ; FAST:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
255  ; FAST:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
256  ; FAST:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
257  ; FAST:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
258  ; FAST: bb.2:
259  ; FAST:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
260  ; FAST:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
261  ; FAST:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
262  ; FAST:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
263  ; FAST:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
264  ; FAST:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
265  ; FAST:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
266  ; FAST:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
267  ; FAST:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
268  ; FAST:   [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
269  ; FAST:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
270  ; FAST:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
271  ; FAST:   [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
272  ; FAST:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
273  ; FAST:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
274  ; FAST:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
275  ; FAST:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
276  ; FAST: bb.3:
277  ; FAST:   successors: %bb.4(0x80000000)
278  ; FAST:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
279  ; FAST: bb.4:
280  ; FAST:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
281  ; FAST:   S_ENDPGM 0
282  ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
283  ; GREEDY: bb.1 (%ir-block.0):
284  ; GREEDY:   successors: %bb.2(0x80000000)
285  ; GREEDY:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
286  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
287  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
288  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
289  ; GREEDY:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
290  ; GREEDY:   [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
291  ; GREEDY:   [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
292  ; GREEDY:   [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
293  ; GREEDY:   [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
294  ; GREEDY:   [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
295  ; GREEDY:   [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
296  ; GREEDY:   [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
297  ; GREEDY:   [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
298  ; GREEDY:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
299  ; GREEDY:   [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
300  ; GREEDY:   [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
301  ; GREEDY:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
302  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
303  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
304  ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
305  ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
306  ; GREEDY: bb.2:
307  ; GREEDY:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
308  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
309  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
310  ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
311  ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
312  ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
313  ; GREEDY:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
314  ; GREEDY:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
315  ; GREEDY:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
316  ; GREEDY:   [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
317  ; GREEDY:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
318  ; GREEDY:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
319  ; GREEDY:   [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
320  ; GREEDY:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
321  ; GREEDY:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
322  ; GREEDY:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
323  ; GREEDY:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
324  ; GREEDY: bb.3:
325  ; GREEDY:   successors: %bb.4(0x80000000)
326  ; GREEDY:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
327  ; GREEDY: bb.4:
328  ; GREEDY:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
329  ; GREEDY:   S_ENDPGM 0
330  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
331  store <4 x float> %v, <4 x float> addrspace(1)* undef
332  ret void
333}
334
335; Waterfall loop for rsrc and sampler
336define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsrc, <4 x i32> %samp, float %s) {
337  ; FAST-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
338  ; FAST: bb.1 (%ir-block.0):
339  ; FAST:   successors: %bb.2(0x80000000)
340  ; FAST:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
341  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
342  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
343  ; FAST:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
344  ; FAST:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
345  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
346  ; FAST:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
347  ; FAST:   [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
348  ; FAST:   [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
349  ; FAST:   [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
350  ; FAST:   [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9
351  ; FAST:   [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10
352  ; FAST:   [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11
353  ; FAST:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
354  ; FAST:   [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
355  ; FAST:   [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
356  ; FAST:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
357  ; FAST:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
358  ; FAST:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
359  ; FAST:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
360  ; FAST:   [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
361  ; FAST:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
362  ; FAST: bb.2:
363  ; FAST:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
364  ; FAST:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
365  ; FAST:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
366  ; FAST:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
367  ; FAST:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
368  ; FAST:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
369  ; FAST:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
370  ; FAST:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
371  ; FAST:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
372  ; FAST:   [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
373  ; FAST:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
374  ; FAST:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
375  ; FAST:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
376  ; FAST:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
377  ; FAST:   [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
378  ; FAST:   [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
379  ; FAST:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
380  ; FAST:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
381  ; FAST:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
382  ; FAST:   [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
383  ; FAST:   [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
384  ; FAST:   [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
385  ; FAST:   [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
386  ; FAST:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec
387  ; FAST:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec
388  ; FAST:   [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32)
389  ; FAST:   [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec
390  ; FAST:   [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc
391  ; FAST:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec
392  ; FAST:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec
393  ; FAST:   [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
394  ; FAST:   [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
395  ; FAST:   [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
396  ; FAST:   [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
397  ; FAST:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
398  ; FAST:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
399  ; FAST:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
400  ; FAST:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
401  ; FAST: bb.3:
402  ; FAST:   successors: %bb.4(0x80000000)
403  ; FAST:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
404  ; FAST: bb.4:
405  ; FAST:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
406  ; FAST:   S_ENDPGM 0
407  ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
408  ; GREEDY: bb.1 (%ir-block.0):
409  ; GREEDY:   successors: %bb.2(0x80000000)
410  ; GREEDY:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
411  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
412  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
413  ; GREEDY:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
414  ; GREEDY:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
415  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
416  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
417  ; GREEDY:   [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
418  ; GREEDY:   [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
419  ; GREEDY:   [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
420  ; GREEDY:   [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9
421  ; GREEDY:   [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10
422  ; GREEDY:   [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11
423  ; GREEDY:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
424  ; GREEDY:   [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
425  ; GREEDY:   [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
426  ; GREEDY:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
427  ; GREEDY:   [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
428  ; GREEDY:   [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
429  ; GREEDY:   [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
430  ; GREEDY:   [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
431  ; GREEDY:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
432  ; GREEDY: bb.2:
433  ; GREEDY:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
434  ; GREEDY:   [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
435  ; GREEDY:   [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
436  ; GREEDY:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
437  ; GREEDY:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
438  ; GREEDY:   [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
439  ; GREEDY:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
440  ; GREEDY:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
441  ; GREEDY:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
442  ; GREEDY:   [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
443  ; GREEDY:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
444  ; GREEDY:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
445  ; GREEDY:   [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
446  ; GREEDY:   [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
447  ; GREEDY:   [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
448  ; GREEDY:   [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
449  ; GREEDY:   [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
450  ; GREEDY:   [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
451  ; GREEDY:   [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
452  ; GREEDY:   [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
453  ; GREEDY:   [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
454  ; GREEDY:   [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
455  ; GREEDY:   [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
456  ; GREEDY:   [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec
457  ; GREEDY:   [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec
458  ; GREEDY:   [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32)
459  ; GREEDY:   [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec
460  ; GREEDY:   [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc
461  ; GREEDY:   [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec
462  ; GREEDY:   [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec
463  ; GREEDY:   [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
464  ; GREEDY:   [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
465  ; GREEDY:   [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
466  ; GREEDY:   [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
467  ; GREEDY:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
468  ; GREEDY:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
469  ; GREEDY:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
470  ; GREEDY:   S_CBRANCH_EXECNZ %bb.2, implicit $exec
471  ; GREEDY: bb.3:
472  ; GREEDY:   successors: %bb.4(0x80000000)
473  ; GREEDY:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
474  ; GREEDY: bb.4:
475  ; GREEDY:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
476  ; GREEDY:   S_ENDPGM 0
477  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
478  store <4 x float> %v, <4 x float> addrspace(1)* undef
479  ret void
480}
481
482declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
483
484attributes #0 = { nounwind readonly }
485