1; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
2; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink  <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s
3; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s
4
5; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
6; GCN-POSTLINK: tail call fast float @_Z3sinf(
7; GCN-POSTLINK: tail call fast float @_Z3cosf(
8; GCN-PRELINK: call fast float @_Z6sincosfPf(
9; GCN-NATIVE: tail call fast float @_Z10native_sinf(
10; GCN-NATIVE: tail call fast float @_Z10native_cosf(
11define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) {
12entry:
13  %tmp = load float, float addrspace(1)* %a, align 4
14  %call = tail call fast float @_Z3sinf(float %tmp)
15  store float %call, float addrspace(1)* %a, align 4
16  %call2 = tail call fast float @_Z3cosf(float %tmp)
17  %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
18  store float %call2, float addrspace(1)* %arrayidx3, align 4
19  ret void
20}
21
22declare float @_Z3sinf(float)
23
24declare float @_Z3cosf(float)
25
26; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2
27; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f(
28; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f(
29; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_(
30; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f(
31; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f(
32define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) {
33entry:
34  %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8
35  %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp)
36  store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8
37  %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp)
38  %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1
39  store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8
40  ret void
41}
42
43declare <2 x float> @_Z3sinDv2_f(<2 x float>)
44
45declare <2 x float> @_Z3cosDv2_f(<2 x float>)
46
47; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3
48; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f(
49; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f(
50; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_(
51; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f(
52; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f(
53define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) {
54entry:
55  %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)*
56  %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16
57  %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
58  %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
59  %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
60  store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16
61  %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
62  %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1
63  %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
64  %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)*
65  store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16
66  ret void
67}
68
69declare <3 x float> @_Z3sinDv3_f(<3 x float>)
70
71declare <3 x float> @_Z3cosDv3_f(<3 x float>)
72
73; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4
74; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f(
75; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f(
76; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_(
77; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f(
78; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f(
79define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) {
80entry:
81  %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
82  %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp)
83  store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16
84  %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp)
85  %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1
86  store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16
87  ret void
88}
89
90declare <4 x float> @_Z3sinDv4_f(<4 x float>)
91
92declare <4 x float> @_Z3cosDv4_f(<4 x float>)
93
94; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8
95; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f(
96; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f(
97; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_(
98; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f(
99; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f(
100define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) {
101entry:
102  %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32
103  %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp)
104  store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32
105  %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp)
106  %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1
107  store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32
108  ret void
109}
110
111declare <8 x float> @_Z3sinDv8_f(<8 x float>)
112
113declare <8 x float> @_Z3cosDv8_f(<8 x float>)
114
115; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16
116; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f(
117; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f(
118; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_(
119; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f(
120; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f(
121define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) {
122entry:
123  %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64
124  %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp)
125  store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64
126  %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp)
127  %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1
128  store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64
129  ret void
130}
131
132declare <16 x float> @_Z3sinDv16_f(<16 x float>)
133
134declare <16 x float> @_Z3cosDv16_f(<16 x float>)
135
136; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip
137; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
138define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) {
139entry:
140  %call = tail call fast float @_Z12native_recipf(float 3.000000e+00)
141  store float %call, float addrspace(1)* %a, align 4
142  ret void
143}
144
145declare float @_Z12native_recipf(float)
146
147; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip
148; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a
149define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) {
150entry:
151  %call = tail call fast float @_Z10half_recipf(float 3.000000e+00)
152  store float %call, float addrspace(1)* %a, align 4
153  ret void
154}
155
156declare float @_Z10half_recipf(float)
157
158; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide
159; GCN: fmul fast float %tmp, 0x3FD5555560000000
160define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) {
161entry:
162  %tmp = load float, float addrspace(1)* %a, align 4
163  %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00)
164  store float %call, float addrspace(1)* %a, align 4
165  ret void
166}
167
168declare float @_Z13native_divideff(float, float)
169
170; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide
171; GCN: fmul fast float %tmp, 0x3FD5555560000000
172define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) {
173entry:
174  %tmp = load float, float addrspace(1)* %a, align 4
175  %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00)
176  store float %call, float addrspace(1)* %a, align 4
177  ret void
178}
179
180declare float @_Z11half_divideff(float, float)
181
182; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f
183; GCN: store float 1.000000e+00, float addrspace(1)* %a
184define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) {
185entry:
186  %tmp = load float, float addrspace(1)* %a, align 4
187  %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
188  store float %call, float addrspace(1)* %a, align 4
189  ret void
190}
191
192declare float @_Z3powff(float, float)
193
194; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i
195; GCN: store float 1.000000e+00, float addrspace(1)* %a
196define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) {
197entry:
198  %tmp = load float, float addrspace(1)* %a, align 4
199  %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00)
200  store float %call, float addrspace(1)* %a, align 4
201  ret void
202}
203
204; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f
205; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
206; GCN: store float %tmp, float addrspace(1)* %a, align 4
207define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) {
208entry:
209  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
210  %tmp = load float, float addrspace(1)* %arrayidx, align 4
211  %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
212  store float %call, float addrspace(1)* %a, align 4
213  ret void
214}
215
216; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i
217; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
218; GCN: store float %tmp, float addrspace(1)* %a, align 4
219define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) {
220entry:
221  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
222  %tmp = load float, float addrspace(1)* %arrayidx, align 4
223  %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00)
224  store float %call, float addrspace(1)* %a, align 4
225  ret void
226}
227
228; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f
229; GCN: %tmp = load float, float addrspace(1)* %a, align 4
230; GCN: %__pow2 = fmul fast float %tmp, %tmp
231define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) {
232entry:
233  %tmp = load float, float addrspace(1)* %a, align 4
234  %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
235  store float %call, float addrspace(1)* %a, align 4
236  ret void
237}
238
239; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i
240; GCN: %tmp = load float, float addrspace(1)* %a, align 4
241; GCN: %__pow2 = fmul fast float %tmp, %tmp
242define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) {
243entry:
244  %tmp = load float, float addrspace(1)* %a, align 4
245  %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00)
246  store float %call, float addrspace(1)* %a, align 4
247  ret void
248}
249
250; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f
251; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
252; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
253define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) {
254entry:
255  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
256  %tmp = load float, float addrspace(1)* %arrayidx, align 4
257  %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
258  store float %call, float addrspace(1)* %a, align 4
259  ret void
260}
261
262; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i
263; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
264; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp
265define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) {
266entry:
267  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
268  %tmp = load float, float addrspace(1)* %arrayidx, align 4
269  %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00)
270  store float %call, float addrspace(1)* %a, align 4
271  ret void
272}
273
274; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half
275; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
276; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
277define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) {
278entry:
279  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
280  %tmp = load float, float addrspace(1)* %arrayidx, align 4
281  %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01)
282  store float %call, float addrspace(1)* %a, align 4
283  ret void
284}
285
286; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf
287; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
288; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
289define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) {
290entry:
291  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
292  %tmp = load float, float addrspace(1)* %arrayidx, align 4
293  %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01)
294  store float %call, float addrspace(1)* %a, align 4
295  ret void
296}
297
298; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c
299; GCN: %__powx2 = fmul fast float %tmp, %tmp
300; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
301; GCN: %__powx22 = fmul fast float %__powx2, %tmp
302; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
303; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
304define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) {
305entry:
306  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
307  %tmp = load float, float addrspace(1)* %arrayidx, align 4
308  %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01)
309  store float %call, float addrspace(1)* %a, align 4
310  ret void
311}
312
313; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c
314; GCN: %__powx2 = fmul fast float %tmp, %tmp
315; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
316; GCN: %__powx22 = fmul fast float %__powx2, %tmp
317; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
318; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
319define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) {
320entry:
321  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
322  %tmp = load float, float addrspace(1)* %arrayidx, align 4
323  %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01)
324  store float %call, float addrspace(1)* %a, align 4
325  ret void
326}
327
328declare float @_Z4powrff(float, float)
329
330; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c
331; GCN: %__powx2 = fmul fast float %tmp, %tmp
332; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
333; GCN: %__powx22 = fmul fast float %__powx2, %tmp
334; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
335; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
336define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) {
337entry:
338  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
339  %tmp = load float, float addrspace(1)* %arrayidx, align 4
340  %call = tail call fast float @_Z4pownfi(float %tmp, i32 11)
341  store float %call, float addrspace(1)* %a, align 4
342  ret void
343}
344
345declare float @_Z4pownfi(float, i32)
346
347; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
348; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
349; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
350; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
351; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03
352; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
353; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
354; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648
355; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
356; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
357; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
358; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
359define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) {
360entry:
361  %tmp = load float, float addrspace(1)* %a, align 4
362  %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03)
363  store float %call, float addrspace(1)* %a, align 4
364  ret void
365}
366
367; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
368; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1)
369; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp)
370; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1
371; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
372; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4
373; GCN-NATIVE:  %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
374; GCN-NATIVE:  %__ylogx = fmul fast float %__log2, %tmp1
375; GCN-NATIVE:  %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
376; GCN-NATIVE:  store float %__exp2, float addrspace(1)* %a, align 4
377define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) {
378entry:
379  %tmp = load float, float addrspace(1)* %a, align 4
380  %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
381  %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
382  %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
383  store float %call, float addrspace(1)* %a, align 4
384  ret void
385}
386
387; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
388; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
389; GCN-PRELINK: %conv = fptosi float %tmp1 to i32
390; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp)
391; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
392; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float
393; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F
394; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
395; GCN-PRELINK: %__yeven = shl i32 %conv, 31
396; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
397; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]
398; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
399; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
400; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
401; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
402define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) {
403entry:
404  %tmp = load float, float addrspace(1)* %a, align 4
405  %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
406  %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
407  %conv = fptosi float %tmp1 to i32
408  %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv)
409  store float %call, float addrspace(1)* %a, align 4
410  ret void
411}
412
413; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1
414; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4
415; GCN: store float %tmp, float addrspace(1)* %a, align 4
416define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) {
417entry:
418  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
419  %tmp = load float, float addrspace(1)* %arrayidx, align 4
420  %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1)
421  store float %call, float addrspace(1)* %a, align 4
422  ret void
423}
424
425declare float @_Z5rootnfi(float, i32)
426
427; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2
428; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2)
429; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp)
430define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) {
431entry:
432  %tmp = load float, float addrspace(1)* %a, align 4
433  %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2)
434  store float %call, float addrspace(1)* %a, align 4
435  ret void
436}
437
438; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3
439; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3)
440; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp)
441define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) {
442entry:
443  %tmp = load float, float addrspace(1)* %a, align 4
444  %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3)
445  store float %call, float addrspace(1)* %a, align 4
446  ret void
447}
448
449; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1
450; GCN: fdiv fast float 1.000000e+00, %tmp
451define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) {
452entry:
453  %tmp = load float, float addrspace(1)* %a, align 4
454  %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1)
455  store float %call, float addrspace(1)* %a, align 4
456  ret void
457}
458
459; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2
460; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
461; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp)
462define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) {
463entry:
464  %tmp = load float, float addrspace(1)* %a, align 4
465  %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2)
466  store float %call, float addrspace(1)* %a, align 4
467  ret void
468}
469
470; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x
471; GCN: store float %y, float addrspace(1)* %a
472define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) {
473entry:
474  %tmp = load float, float addrspace(1)* %a, align 4
475  %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y)
476  store float %call, float addrspace(1)* %a, align 4
477  ret void
478}
479
480declare float @_Z3fmafff(float, float, float)
481
482; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0
483; GCN: store float %y, float addrspace(1)* %a
484define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) {
485entry:
486  %tmp = load float, float addrspace(1)* %a, align 4
487  %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y)
488  store float %call, float addrspace(1)* %a, align 4
489  ret void
490}
491
492; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x
493; GCN: store float %y, float addrspace(1)* %a
494define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) {
495entry:
496  %tmp = load float, float addrspace(1)* %a, align 4
497  %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y)
498  store float %call, float addrspace(1)* %a, align 4
499  ret void
500}
501
502declare float @_Z3madfff(float, float, float)
503
504; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0
505; GCN: store float %y, float addrspace(1)* %a
506define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) {
507entry:
508  %tmp = load float, float addrspace(1)* %a, align 4
509  %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y)
510  store float %call, float addrspace(1)* %a, align 4
511  ret void
512}
513
514; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y
515; GCN: %fmaadd = fadd fast float %tmp, %y
516define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) {
517entry:
518  %tmp = load float, float addrspace(1)* %a, align 4
519  %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y)
520  store float %call, float addrspace(1)* %a, align 4
521  ret void
522}
523
524; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy
525; GCN: %fmaadd = fadd fast float %tmp, %y
526define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) {
527entry:
528  %tmp = load float, float addrspace(1)* %a, align 4
529  %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y)
530  store float %call, float addrspace(1)* %a, align 4
531  ret void
532}
533
534; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0
535; GCN: %fmamul = fmul fast float %tmp1, %tmp
536define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) {
537entry:
538  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
539  %tmp = load float, float addrspace(1)* %arrayidx, align 4
540  %tmp1 = load float, float addrspace(1)* %a, align 4
541  %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00)
542  store float %call, float addrspace(1)* %a, align 4
543  ret void
544}
545
546; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp
547; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp)
548define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) {
549entry:
550  %tmp = load float, float addrspace(1)* %a, align 4
551  %call = tail call fast float @_Z3expf(float %tmp)
552  store float %call, float addrspace(1)* %a, align 4
553  ret void
554}
555
556declare float @_Z3expf(float)
557
558; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2
559; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp)
560define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) {
561entry:
562  %tmp = load float, float addrspace(1)* %a, align 4
563  %call = tail call fast float @_Z4exp2f(float %tmp)
564  store float %call, float addrspace(1)* %a, align 4
565  ret void
566}
567
568declare float @_Z4exp2f(float)
569
570; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10
571; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp)
572define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) {
573entry:
574  %tmp = load float, float addrspace(1)* %a, align 4
575  %call = tail call fast float @_Z5exp10f(float %tmp)
576  store float %call, float addrspace(1)* %a, align 4
577  ret void
578}
579
580declare float @_Z5exp10f(float)
581
582; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log
583; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp)
584define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) {
585entry:
586  %tmp = load float, float addrspace(1)* %a, align 4
587  %call = tail call fast float @_Z3logf(float %tmp)
588  store float %call, float addrspace(1)* %a, align 4
589  ret void
590}
591
592declare float @_Z3logf(float)
593
594; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2
595; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp)
596define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) {
597entry:
598  %tmp = load float, float addrspace(1)* %a, align 4
599  %call = tail call fast float @_Z4log2f(float %tmp)
600  store float %call, float addrspace(1)* %a, align 4
601  ret void
602}
603
604declare float @_Z4log2f(float)
605
606; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10
607; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp)
608define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) {
609entry:
610  %tmp = load float, float addrspace(1)* %a, align 4
611  %call = tail call fast float @_Z5log10f(float %tmp)
612  store float %call, float addrspace(1)* %a, align 4
613  ret void
614}
615
616declare float @_Z5log10f(float)
617
618; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
619; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
620; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
621; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
622; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
623; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4
624define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) {
625entry:
626  %tmp = load float, float addrspace(1)* %a, align 4
627  %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
628  %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4
629  %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
630  store float %call, float addrspace(1)* %a, align 4
631  ret void
632}
633
634; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
635; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp)
636define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) {
637entry:
638  %tmp = load float, float addrspace(1)* %a, align 4
639  %call = tail call fast float @_Z4sqrtf(float %tmp)
640  store float %call, float addrspace(1)* %a, align 4
641  ret void
642}
643
644; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64
645; GCN: tail call fast double @_Z4sqrtd(double %tmp)
646define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) {
647entry:
648  %tmp = load double, double addrspace(1)* %a, align 8
649  %call = tail call fast double @_Z4sqrtd(double %tmp)
650  store double %call, double addrspace(1)* %a, align 8
651  ret void
652}
653
654declare float @_Z4sqrtf(float)
655declare double @_Z4sqrtd(double)
656
657; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt
658; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp)
659define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) {
660entry:
661  %tmp = load float, float addrspace(1)* %a, align 4
662  %call = tail call fast float @_Z5rsqrtf(float %tmp)
663  store float %call, float addrspace(1)* %a, align 4
664  ret void
665}
666
667declare float @_Z5rsqrtf(float)
668
669; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan
670; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp)
671define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) {
672entry:
673  %tmp = load float, float addrspace(1)* %a, align 4
674  %call = tail call fast float @_Z3tanf(float %tmp)
675  store float %call, float addrspace(1)* %a, align 4
676  ret void
677}
678
679declare float @_Z3tanf(float)
680
681; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos
682; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp)
683; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp)
684define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) {
685entry:
686  %tmp = load float, float addrspace(1)* %a, align 4
687  %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
688  %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float*
689  %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1)
690  store float %call, float addrspace(1)* %a, align 4
691  ret void
692}
693
694declare float @_Z6sincosfPf(float, float*)
695
696%opencl.pipe_t = type opaque
697%opencl.reserve_id_t = type opaque
698
699; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
700; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]]
701; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
702define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
703entry:
704  %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
705  %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
706  %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
707  %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4)
708  %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
709  tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4)
710  ret void
711}
712
713declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32)
714
715declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32)
716
717declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32)
718
719declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32)
720
721; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
722; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]]
723; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]]
724define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
725entry:
726  %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
727  %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8*
728  %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0
729  %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0
730  %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0
731  tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0
732  ret void
733}
734
735declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr
736
737declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr
738
739declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr
740
741declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr
742
743%struct.S = type { [100 x i32] }
744
745; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size
746; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]]
747; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]]
748; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]]
749; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]]
750; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]]
751; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]]
752; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]]
753; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]]
754; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]]
755define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 {
756entry:
757  %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8*
758  %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0
759  %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)*
760  %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8*
761  %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0
762  %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)*
763  %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8*
764  %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0
765  %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)*
766  %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8*
767  %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0
768  %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)*
769  %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8*
770  %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0
771  %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)*
772  %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8*
773  %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0
774  %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)*
775  %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8*
776  %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0
777  %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)*
778  %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8*
779  %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0
780  %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)*
781  %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8*
782  %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0
783  ret void
784}
785
786; CGN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind }
787attributes #0 = { nounwind }
788