1; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s 2; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s 3; RUN: opt -S -O1 -mtriple=amdgcn---amdgiz -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s 4 5; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos 6; GCN-POSTLINK: tail call fast float @_Z3sinf( 7; GCN-POSTLINK: tail call fast float @_Z3cosf( 8; GCN-PRELINK: call fast float @_Z6sincosfPf( 9; GCN-NATIVE: tail call fast float @_Z10native_sinf( 10; GCN-NATIVE: tail call fast float @_Z10native_cosf( 11define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) { 12entry: 13 %tmp = load float, float addrspace(1)* %a, align 4 14 %call = tail call fast float @_Z3sinf(float %tmp) 15 store float %call, float addrspace(1)* %a, align 4 16 %call2 = tail call fast float @_Z3cosf(float %tmp) 17 %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 18 store float %call2, float addrspace(1)* %arrayidx3, align 4 19 ret void 20} 21 22declare float @_Z3sinf(float) 23 24declare float @_Z3cosf(float) 25 26; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2 27; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f( 28; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f( 29; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_( 30; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f( 31; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f( 32define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) { 33entry: 34 %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8 35 %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) 36 store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8 37 %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) 38 %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1 39 store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8 40 ret void 41} 42 43declare <2 x float> @_Z3sinDv2_f(<2 x float>) 44 45declare <2 x float> @_Z3cosDv2_f(<2 x float>) 46 47; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3 48; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f( 49; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f( 50; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_( 51; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f( 52; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f( 53define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) { 54entry: 55 %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)* 56 %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16 57 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 58 %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) 59 %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 60 store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16 61 %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) 62 %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1 63 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 64 %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)* 65 store <4 x float> %extractVec13, <4 x float> addrspace(1)* %storetmp14, align 16 66 ret void 67} 68 69declare <3 x float> @_Z3sinDv3_f(<3 x float>) 70 71declare <3 x float> @_Z3cosDv3_f(<3 x float>) 72 73; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4 74; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f( 75; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f( 76; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_( 77; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f( 78; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f( 79define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) { 80entry: 81 %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16 82 %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) 83 store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16 84 %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) 85 %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1 86 store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16 87 ret void 88} 89 90declare <4 x float> @_Z3sinDv4_f(<4 x float>) 91 92declare <4 x float> @_Z3cosDv4_f(<4 x float>) 93 94; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8 95; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f( 96; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f( 97; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_( 98; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f( 99; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f( 100define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) { 101entry: 102 %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32 103 %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) 104 store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32 105 %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) 106 %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1 107 store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32 108 ret void 109} 110 111declare <8 x float> @_Z3sinDv8_f(<8 x float>) 112 113declare <8 x float> @_Z3cosDv8_f(<8 x float>) 114 115; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16 116; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f( 117; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f( 118; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_( 119; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f( 120; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f( 121define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) { 122entry: 123 %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64 124 %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) 125 store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64 126 %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) 127 %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1 128 store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64 129 ret void 130} 131 132declare <16 x float> @_Z3sinDv16_f(<16 x float>) 133 134declare <16 x float> @_Z3cosDv16_f(<16 x float>) 135 136; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip 137; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a 138define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) { 139entry: 140 %call = tail call fast float @_Z12native_recipf(float 3.000000e+00) 141 store float %call, float addrspace(1)* %a, align 4 142 ret void 143} 144 145declare float @_Z12native_recipf(float) 146 147; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip 148; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a 149define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) { 150entry: 151 %call = tail call fast float @_Z10half_recipf(float 3.000000e+00) 152 store float %call, float addrspace(1)* %a, align 4 153 ret void 154} 155 156declare float @_Z10half_recipf(float) 157 158; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide 159; GCN: fmul fast float %tmp, 0x3FD5555560000000 160define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) { 161entry: 162 %tmp = load float, float addrspace(1)* %a, align 4 163 %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) 164 store float %call, float addrspace(1)* %a, align 4 165 ret void 166} 167 168declare float @_Z13native_divideff(float, float) 169 170; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide 171; GCN: fmul fast float %tmp, 0x3FD5555560000000 172define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) { 173entry: 174 %tmp = load float, float addrspace(1)* %a, align 4 175 %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) 176 store float %call, float addrspace(1)* %a, align 4 177 ret void 178} 179 180declare float @_Z11half_divideff(float, float) 181 182; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f 183; GCN: store float 1.000000e+00, float addrspace(1)* %a 184define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) { 185entry: 186 %tmp = load float, float addrspace(1)* %a, align 4 187 %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) 188 store float %call, float addrspace(1)* %a, align 4 189 ret void 190} 191 192declare float @_Z3powff(float, float) 193 194; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i 195; GCN: store float 1.000000e+00, float addrspace(1)* %a 196define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) { 197entry: 198 %tmp = load float, float addrspace(1)* %a, align 4 199 %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) 200 store float %call, float addrspace(1)* %a, align 4 201 ret void 202} 203 204; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f 205; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 206; GCN: store float %tmp, float addrspace(1)* %a, align 4 207define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) { 208entry: 209 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 210 %tmp = load float, float addrspace(1)* %arrayidx, align 4 211 %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) 212 store float %call, float addrspace(1)* %a, align 4 213 ret void 214} 215 216; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i 217; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 218; GCN: store float %tmp, float addrspace(1)* %a, align 4 219define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) { 220entry: 221 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 222 %tmp = load float, float addrspace(1)* %arrayidx, align 4 223 %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) 224 store float %call, float addrspace(1)* %a, align 4 225 ret void 226} 227 228; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f 229; GCN: %tmp = load float, float addrspace(1)* %a, align 4 230; GCN: %__pow2 = fmul fast float %tmp, %tmp 231define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) { 232entry: 233 %tmp = load float, float addrspace(1)* %a, align 4 234 %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) 235 store float %call, float addrspace(1)* %a, align 4 236 ret void 237} 238 239; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i 240; GCN: %tmp = load float, float addrspace(1)* %a, align 4 241; GCN: %__pow2 = fmul fast float %tmp, %tmp 242define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) { 243entry: 244 %tmp = load float, float addrspace(1)* %a, align 4 245 %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) 246 store float %call, float addrspace(1)* %a, align 4 247 ret void 248} 249 250; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f 251; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 252; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp 253define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) { 254entry: 255 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 256 %tmp = load float, float addrspace(1)* %arrayidx, align 4 257 %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) 258 store float %call, float addrspace(1)* %a, align 4 259 ret void 260} 261 262; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i 263; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 264; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp 265define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) { 266entry: 267 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 268 %tmp = load float, float addrspace(1)* %arrayidx, align 4 269 %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) 270 store float %call, float addrspace(1)* %a, align 4 271 ret void 272} 273 274; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half 275; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) 276; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp) 277define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) { 278entry: 279 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 280 %tmp = load float, float addrspace(1)* %arrayidx, align 4 281 %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) 282 store float %call, float addrspace(1)* %a, align 4 283 ret void 284} 285 286; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf 287; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) 288; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) 289define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) { 290entry: 291 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 292 %tmp = load float, float addrspace(1)* %arrayidx, align 4 293 %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) 294 store float %call, float addrspace(1)* %a, align 4 295 ret void 296} 297 298; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c 299; GCN: %__powx2 = fmul fast float %tmp, %tmp 300; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 301; GCN: %__powx22 = fmul fast float %__powx2, %tmp 302; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 303; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 304define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { 305entry: 306 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 307 %tmp = load float, float addrspace(1)* %arrayidx, align 4 308 %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01) 309 store float %call, float addrspace(1)* %a, align 4 310 ret void 311} 312 313; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c 314; GCN: %__powx2 = fmul fast float %tmp, %tmp 315; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 316; GCN: %__powx22 = fmul fast float %__powx2, %tmp 317; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 318; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 319define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { 320entry: 321 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 322 %tmp = load float, float addrspace(1)* %arrayidx, align 4 323 %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01) 324 store float %call, float addrspace(1)* %a, align 4 325 ret void 326} 327 328declare float @_Z4powrff(float, float) 329 330; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c 331; GCN: %__powx2 = fmul fast float %tmp, %tmp 332; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 333; GCN: %__powx22 = fmul fast float %__powx2, %tmp 334; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 335; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 336define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { 337entry: 338 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 339 %tmp = load float, float addrspace(1)* %arrayidx, align 4 340 %call = tail call fast float @_Z4pownfi(float %tmp, i32 11) 341 store float %call, float addrspace(1)* %a, align 4 342 ret void 343} 344 345declare float @_Z4pownfi(float, i32) 346 347; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow 348; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) 349; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) 350; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) 351; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03 352; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) 353; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 354; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 355; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 356; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] 357; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* 358; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 359define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { 360entry: 361 %tmp = load float, float addrspace(1)* %a, align 4 362 %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) 363 store float %call, float addrspace(1)* %a, align 4 364 ret void 365} 366 367; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr 368; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1) 369; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp) 370; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1 371; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) 372; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4 373; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) 374; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 375; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) 376; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 377define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) { 378entry: 379 %tmp = load float, float addrspace(1)* %a, align 4 380 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 381 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 382 %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) 383 store float %call, float addrspace(1)* %a, align 4 384 ret void 385} 386 387; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown 388; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv) 389; GCN-PRELINK: %conv = fptosi float %tmp1 to i32 390; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) 391; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) 392; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float 393; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F 394; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) 395; GCN-PRELINK: %__yeven = shl i32 %conv, 31 396; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 397; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] 398; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 399; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] 400; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* 401; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 402define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) { 403entry: 404 %tmp = load float, float addrspace(1)* %a, align 4 405 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 406 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 407 %conv = fptosi float %tmp1 to i32 408 %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv) 409 store float %call, float addrspace(1)* %a, align 4 410 ret void 411} 412 413; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1 414; GCN: %tmp = load float, float addrspace(1)* %arrayidx, align 4 415; GCN: store float %tmp, float addrspace(1)* %a, align 4 416define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) { 417entry: 418 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 419 %tmp = load float, float addrspace(1)* %arrayidx, align 4 420 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1) 421 store float %call, float addrspace(1)* %a, align 4 422 ret void 423} 424 425declare float @_Z5rootnfi(float, i32) 426 427; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 428; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2) 429; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp) 430define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) { 431entry: 432 %tmp = load float, float addrspace(1)* %a, align 4 433 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2) 434 store float %call, float addrspace(1)* %a, align 4 435 ret void 436} 437 438; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3 439; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3) 440; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp) 441define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) { 442entry: 443 %tmp = load float, float addrspace(1)* %a, align 4 444 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3) 445 store float %call, float addrspace(1)* %a, align 4 446 ret void 447} 448 449; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1 450; GCN: fdiv fast float 1.000000e+00, %tmp 451define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) { 452entry: 453 %tmp = load float, float addrspace(1)* %a, align 4 454 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1) 455 store float %call, float addrspace(1)* %a, align 4 456 ret void 457} 458 459; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2 460; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2) 461; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) 462define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) { 463entry: 464 %tmp = load float, float addrspace(1)* %a, align 4 465 %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2) 466 store float %call, float addrspace(1)* %a, align 4 467 ret void 468} 469 470; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x 471; GCN: store float %y, float addrspace(1)* %a 472define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) { 473entry: 474 %tmp = load float, float addrspace(1)* %a, align 4 475 %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) 476 store float %call, float addrspace(1)* %a, align 4 477 ret void 478} 479 480declare float @_Z3fmafff(float, float, float) 481 482; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0 483; GCN: store float %y, float addrspace(1)* %a 484define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) { 485entry: 486 %tmp = load float, float addrspace(1)* %a, align 4 487 %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) 488 store float %call, float addrspace(1)* %a, align 4 489 ret void 490} 491 492; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x 493; GCN: store float %y, float addrspace(1)* %a 494define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) { 495entry: 496 %tmp = load float, float addrspace(1)* %a, align 4 497 %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) 498 store float %call, float addrspace(1)* %a, align 4 499 ret void 500} 501 502declare float @_Z3madfff(float, float, float) 503 504; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0 505; GCN: store float %y, float addrspace(1)* %a 506define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) { 507entry: 508 %tmp = load float, float addrspace(1)* %a, align 4 509 %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) 510 store float %call, float addrspace(1)* %a, align 4 511 ret void 512} 513 514; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y 515; GCN: %fmaadd = fadd fast float %tmp, %y 516define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) { 517entry: 518 %tmp = load float, float addrspace(1)* %a, align 4 519 %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) 520 store float %call, float addrspace(1)* %a, align 4 521 ret void 522} 523 524; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy 525; GCN: %fmaadd = fadd fast float %tmp, %y 526define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) { 527entry: 528 %tmp = load float, float addrspace(1)* %a, align 4 529 %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) 530 store float %call, float addrspace(1)* %a, align 4 531 ret void 532} 533 534; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0 535; GCN: %fmamul = fmul fast float %tmp1, %tmp 536define amdgpu_kernel void @test_fma_xy0(float addrspace(1)* nocapture %a) { 537entry: 538 %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 539 %tmp = load float, float addrspace(1)* %arrayidx, align 4 540 %tmp1 = load float, float addrspace(1)* %a, align 4 541 %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) 542 store float %call, float addrspace(1)* %a, align 4 543 ret void 544} 545 546; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp 547; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp) 548define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) { 549entry: 550 %tmp = load float, float addrspace(1)* %a, align 4 551 %call = tail call fast float @_Z3expf(float %tmp) 552 store float %call, float addrspace(1)* %a, align 4 553 ret void 554} 555 556declare float @_Z3expf(float) 557 558; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2 559; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp) 560define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) { 561entry: 562 %tmp = load float, float addrspace(1)* %a, align 4 563 %call = tail call fast float @_Z4exp2f(float %tmp) 564 store float %call, float addrspace(1)* %a, align 4 565 ret void 566} 567 568declare float @_Z4exp2f(float) 569 570; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10 571; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp) 572define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) { 573entry: 574 %tmp = load float, float addrspace(1)* %a, align 4 575 %call = tail call fast float @_Z5exp10f(float %tmp) 576 store float %call, float addrspace(1)* %a, align 4 577 ret void 578} 579 580declare float @_Z5exp10f(float) 581 582; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log 583; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp) 584define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) { 585entry: 586 %tmp = load float, float addrspace(1)* %a, align 4 587 %call = tail call fast float @_Z3logf(float %tmp) 588 store float %call, float addrspace(1)* %a, align 4 589 ret void 590} 591 592declare float @_Z3logf(float) 593 594; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2 595; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp) 596define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) { 597entry: 598 %tmp = load float, float addrspace(1)* %a, align 4 599 %call = tail call fast float @_Z4log2f(float %tmp) 600 store float %call, float addrspace(1)* %a, align 4 601 ret void 602} 603 604declare float @_Z4log2f(float) 605 606; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10 607; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp) 608define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) { 609entry: 610 %tmp = load float, float addrspace(1)* %a, align 4 611 %call = tail call fast float @_Z5log10f(float %tmp) 612 store float %call, float addrspace(1)* %a, align 4 613 ret void 614} 615 616declare float @_Z5log10f(float) 617 618; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr 619; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 620; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) 621; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 622; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) 623; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 624define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) { 625entry: 626 %tmp = load float, float addrspace(1)* %a, align 4 627 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 628 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 629 %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) 630 store float %call, float addrspace(1)* %a, align 4 631 ret void 632} 633 634; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt 635; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp) 636define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) { 637entry: 638 %tmp = load float, float addrspace(1)* %a, align 4 639 %call = tail call fast float @_Z4sqrtf(float %tmp) 640 store float %call, float addrspace(1)* %a, align 4 641 ret void 642} 643 644; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64 645; GCN: tail call fast double @_Z4sqrtd(double %tmp) 646define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) { 647entry: 648 %tmp = load double, double addrspace(1)* %a, align 8 649 %call = tail call fast double @_Z4sqrtd(double %tmp) 650 store double %call, double addrspace(1)* %a, align 8 651 ret void 652} 653 654declare float @_Z4sqrtf(float) 655declare double @_Z4sqrtd(double) 656 657; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt 658; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp) 659define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) { 660entry: 661 %tmp = load float, float addrspace(1)* %a, align 4 662 %call = tail call fast float @_Z5rsqrtf(float %tmp) 663 store float %call, float addrspace(1)* %a, align 4 664 ret void 665} 666 667declare float @_Z5rsqrtf(float) 668 669; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan 670; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp) 671define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) { 672entry: 673 %tmp = load float, float addrspace(1)* %a, align 4 674 %call = tail call fast float @_Z3tanf(float %tmp) 675 store float %call, float addrspace(1)* %a, align 4 676 ret void 677} 678 679declare float @_Z3tanf(float) 680 681; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos 682; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp) 683; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp) 684define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) { 685entry: 686 %tmp = load float, float addrspace(1)* %a, align 4 687 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 688 %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float* 689 %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) 690 store float %call, float addrspace(1)* %a, align 4 691 ret void 692} 693 694declare float @_Z6sincosfPf(float, float*) 695 696%opencl.pipe_t = type opaque 697%opencl.reserve_id_t = type opaque 698 699; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) 700; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND:[0-9]+]] 701; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] 702define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { 703entry: 704 %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* 705 %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* 706 %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 707 %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) 708 %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 709 tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) 710 ret void 711} 712 713declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) 714 715declare %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) 716 717declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) 718 719declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) 720 721; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) 722; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}}) #[[$NOUNWIND]] 723; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t addrspace(5)* %{{.*}}, i32 2, i32* %{{.*}}) #[[$NOUNWIND]] 724define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { 725entry: 726 %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* 727 %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* 728 %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 729 %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 730 %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 731 tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 732 ret void 733} 734 735declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8*, i32, i32) local_unnamed_addr 736 737declare %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr 738 739declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i8*, i32, i32) local_unnamed_addr 740 741declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t addrspace(5)*, i32, i32) local_unnamed_addr 742 743%struct.S = type { [100 x i32] } 744 745; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size 746; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8* %{{.*}}) #[[$NOUNWIND]] 747; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16* %{{.*}}) #[[$NOUNWIND]] 748; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32* %{{.*}}) #[[$NOUNWIND]] 749; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64* %{{.*}}) #[[$NOUNWIND]] 750; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}}) #[[$NOUNWIND]] 751; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}} #[[$NOUNWIND]] 752; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}} #[[$NOUNWIND]] 753; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}} #[[$NOUNWIND]] 754; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}} i32 400, i32 4) #[[$NOUNWIND]] 755define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { 756entry: 757 %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8* 758 %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 759 %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* 760 %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8* 761 %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 762 %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* 763 %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8* 764 %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 765 %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* 766 %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8* 767 %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 768 %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* 769 %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8* 770 %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 771 %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* 772 %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8* 773 %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 774 %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* 775 %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8* 776 %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 777 %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* 778 %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8* 779 %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 780 %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* 781 %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8* 782 %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 783 ret void 784} 785 786; CGN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind } 787attributes #0 = { nounwind } 788