1;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
3
4;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
5;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3
6;CHECK-DAG: image_sample {{v[0-9]+}}, 2
7;CHECK-DAG: image_sample {{v[0-9]+}}, 1
8;CHECK-DAG: image_sample {{v[0-9]+}}, 4
9;CHECK-DAG: image_sample {{v[0-9]+}}, 8
10;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 5
11;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 9
12;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 6
13;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
14;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 12
15;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 7
16;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 11
17;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 13
18;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
19;CHECK-DAG: image_sample {{v[0-9]+}}, 8
20
21define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
22   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
23   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
24   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
25   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
26   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
27   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
28   %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
29   %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
30   %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
31   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
32   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
33   %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
34   %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
35   %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
36   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
37   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
38   %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
39      <32 x i8> undef, <16 x i8> undef, i32 1)
40   %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
41      <32 x i8> undef, <16 x i8> undef, i32 2)
42   %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
43      <32 x i8> undef, <16 x i8> undef, i32 3)
44   %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
45      <32 x i8> undef, <16 x i8> undef, i32 4)
46   %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
47      <32 x i8> undef, <16 x i8> undef, i32 5)
48   %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
49      <32 x i8> undef, <16 x i8> undef, i32 6)
50   %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
51      <32 x i8> undef, <16 x i8> undef, i32 7)
52   %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
53      <32 x i8> undef, <16 x i8> undef, i32 8)
54   %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
55      <32 x i8> undef, <16 x i8> undef, i32 9)
56   %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
57      <32 x i8> undef, <16 x i8> undef, i32 10)
58   %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
59      <32 x i8> undef, <16 x i8> undef, i32 11)
60   %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
61      <32 x i8> undef, <16 x i8> undef, i32 12)
62   %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
63      <32 x i8> undef, <16 x i8> undef, i32 13)
64   %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
65      <32 x i8> undef, <16 x i8> undef, i32 14)
66   %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
67      <32 x i8> undef, <16 x i8> undef, i32 15)
68   %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
69      <32 x i8> undef, <16 x i8> undef, i32 16)
70   %e1 = extractelement <4 x float> %res1, i32 0
71   %e2 = extractelement <4 x float> %res2, i32 1
72   %e3 = extractelement <4 x float> %res3, i32 2
73   %e4 = extractelement <4 x float> %res4, i32 3
74   %t0 = extractelement <4 x float> %res5, i32 0
75   %t1 = extractelement <4 x float> %res5, i32 1
76   %e5 = fadd float %t0, %t1
77   %t2 = extractelement <4 x float> %res6, i32 0
78   %t3 = extractelement <4 x float> %res6, i32 2
79   %e6 = fadd float %t2, %t3
80   %t4 = extractelement <4 x float> %res7, i32 0
81   %t5 = extractelement <4 x float> %res7, i32 3
82   %e7 = fadd float %t4, %t5
83   %t6 = extractelement <4 x float> %res8, i32 1
84   %t7 = extractelement <4 x float> %res8, i32 2
85   %e8 = fadd float %t6, %t7
86   %t8 = extractelement <4 x float> %res9, i32 1
87   %t9 = extractelement <4 x float> %res9, i32 3
88   %e9 = fadd float %t8, %t9
89   %t10 = extractelement <4 x float> %res10, i32 2
90   %t11 = extractelement <4 x float> %res10, i32 3
91   %e10 = fadd float %t10, %t11
92   %t12 = extractelement <4 x float> %res11, i32 0
93   %t13 = extractelement <4 x float> %res11, i32 1
94   %t14 = extractelement <4 x float> %res11, i32 2
95   %t15 = fadd float %t12, %t13
96   %e11 = fadd float %t14, %t15
97   %t16 = extractelement <4 x float> %res12, i32 0
98   %t17 = extractelement <4 x float> %res12, i32 1
99   %t18 = extractelement <4 x float> %res12, i32 3
100   %t19 = fadd float %t16, %t17
101   %e12 = fadd float %t18, %t19
102   %t20 = extractelement <4 x float> %res13, i32 0
103   %t21 = extractelement <4 x float> %res13, i32 2
104   %t22 = extractelement <4 x float> %res13, i32 3
105   %t23 = fadd float %t20, %t21
106   %e13 = fadd float %t22, %t23
107   %t24 = extractelement <4 x float> %res14, i32 1
108   %t25 = extractelement <4 x float> %res14, i32 2
109   %t26 = extractelement <4 x float> %res14, i32 3
110   %t27 = fadd float %t24, %t25
111   %e14 = fadd float %t26, %t27
112   %t28 = extractelement <4 x float> %res15, i32 0
113   %t29 = extractelement <4 x float> %res15, i32 1
114   %t30 = extractelement <4 x float> %res15, i32 2
115   %t31 = extractelement <4 x float> %res15, i32 3
116   %t32 = fadd float %t28, %t29
117   %t33 = fadd float %t30, %t31
118   %e15 = fadd float %t32, %t33
119   %e16 = extractelement <4 x float> %res16, i32 3
120   %s1 = fadd float %e1, %e2
121   %s2 = fadd float %s1, %e3
122   %s3 = fadd float %s2, %e4
123   %s4 = fadd float %s3, %e5
124   %s5 = fadd float %s4, %e6
125   %s6 = fadd float %s5, %e7
126   %s7 = fadd float %s6, %e8
127   %s8 = fadd float %s7, %e9
128   %s9 = fadd float %s8, %e10
129   %s10 = fadd float %s9, %e11
130   %s11 = fadd float %s10, %e12
131   %s12 = fadd float %s11, %e13
132   %s13 = fadd float %s12, %e14
133   %s14 = fadd float %s13, %e15
134   %s15 = fadd float %s14, %e16
135   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
136   ret void
137}
138
139; CHECK: {{^}}v1:
140; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
141define void @v1(i32 %a1) #0 {
142entry:
143  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
144  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
145  %2 = extractelement <4 x float> %1, i32 0
146  %3 = extractelement <4 x float> %1, i32 1
147  %4 = extractelement <4 x float> %1, i32 2
148  %5 = extractelement <4 x float> %1, i32 3
149  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
150  ret void
151}
152
153
154declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
155
156declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
157
158declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
159
160attributes #0 = { "ShaderType"="0" }
161