1// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=ALL,X86 %s 2// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s 3// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s 4// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s 5// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s 6// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s 7 8typedef int int2 __attribute__((ext_vector_type(2))); 9 10typedef struct { 11 int cells[9]; 12} Mat3X3; 13 14typedef struct { 15 int cells[16]; 16} Mat4X4; 17 18typedef struct { 19 int cells[1024]; 20} Mat32X32; 21 22typedef struct { 23 int cells[4096]; 24} Mat64X64; 25 26struct StructOneMember { 27 int2 x; 28}; 29 30struct StructTwoMember { 31 int2 x; 32 int2 y; 33}; 34 35struct LargeStructOneMember { 36 int2 x[100]; 37}; 38 39struct LargeStructTwoMember { 40 int2 x[40]; 41 int2 y[20]; 42}; 43 44#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) 45struct LargeStructOneMember g_s; 46#endif 47 48// X86-LABEL: define{{.*}} void @foo(%struct.Mat4X4* noalias sret(%struct.Mat4X4) align 4 %agg.result, %struct.Mat3X3* byval(%struct.Mat3X3) align 4 %in) 49// AMDGCN-LABEL: define{{.*}} %struct.Mat4X4 @foo([9 x i32] %in.coerce) 50Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { 51 Mat4X4 out; 52 return out; 53} 54 55// ALL-LABEL: define {{.*}} void @ker 56// Expect two mem copies: one for the argument "in", and one for 57// the return value. 58// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8* 59// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* 60 61// AMDGCN: load [9 x i32], [9 x i32] addrspace(1)* 62// AMDGCN: call %struct.Mat4X4 @foo([9 x i32] 63// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* 64kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { 65 out[0] = foo(in[1]); 66} 67 68// X86-LABEL: define{{.*}} void @foo_large(%struct.Mat64X64* noalias sret(%struct.Mat64X64) align 4 %agg.result, %struct.Mat32X32* byval(%struct.Mat32X32) align 4 %in) 69// AMDGCN-LABEL: define{{.*}} void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret(%struct.Mat64X64) align 4 %agg.result, %struct.Mat32X32 addrspace(5)* byval(%struct.Mat32X32) align 4 %in) 70Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { 71 Mat64X64 out; 72 return out; 73} 74 75// ALL-LABEL: define {{.*}} void @ker_large 76// Expect two mem copies: one for the argument "in", and one for 77// the return value. 78// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8* 79// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* 80// AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* 81// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* 82kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { 83 out[0] = foo_large(in[1]); 84} 85 86// AMDGCN-LABEL: define{{.*}} void @FuncOneMember(<2 x i32> %u.coerce) 87void FuncOneMember(struct StructOneMember u) { 88 u.x = (int2)(0, 0); 89} 90 91// AMDGCN-LABEL: define{{.*}} void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %u) 92// AMDGCN-NOT: addrspacecast 93// AMDGCN: store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)* 94void FuncOneLargeMember(struct LargeStructOneMember u) { 95 u.x[0] = (int2)(0, 0); 96} 97 98// AMDGCN20-LABEL: define{{.*}} void @test_indirect_arg_globl() 99// AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) 100// AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* 101// AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false) 102// AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) 103#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) 104void test_indirect_arg_globl(void) { 105 FuncOneLargeMember(g_s); 106} 107#endif 108 109// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @test_indirect_arg_local() 110// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) 111// AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* 112// AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false) 113// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) 114kernel void test_indirect_arg_local(void) { 115 local struct LargeStructOneMember l_s; 116 FuncOneLargeMember(l_s); 117} 118 119// AMDGCN-LABEL: define{{.*}} void @test_indirect_arg_private() 120// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) 121// AMDGCN-NOT: @llvm.memcpy 122// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[p_s]]) 123void test_indirect_arg_private(void) { 124 struct LargeStructOneMember p_s; 125 FuncOneLargeMember(p_s); 126} 127 128// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelOneMember 129// AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]]) 130// AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5) 131// AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0 132// AMDGCN: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]] 133// AMDGCN: call void @FuncOneMember(<2 x i32> 134kernel void KernelOneMember(struct StructOneMember u) { 135 FuncOneMember(u); 136} 137 138// SPIR: call void @llvm.memcpy.p0i8.p1i8.i32 139// SPIR-NOT: addrspacecast 140kernel void KernelOneMemberSpir(global struct StructOneMember* u) { 141 FuncOneMember(*u); 142} 143 144// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeOneMember( 145// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) 146// AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8 147// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[U]]) 148kernel void KernelLargeOneMember(struct LargeStructOneMember u) { 149 FuncOneLargeMember(u); 150} 151 152// AMDGCN-LABEL: define{{.*}} void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1) 153void FuncTwoMember(struct StructTwoMember u) { 154 u.y = (int2)(0, 0); 155} 156 157// AMDGCN-LABEL: define{{.*}} void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %u) 158void FuncLargeTwoMember(struct LargeStructTwoMember u) { 159 u.y[0] = (int2)(0, 0); 160} 161 162// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelTwoMember 163// AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]]) 164// AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5) 165// AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)* 166// AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)* 167// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]]) 168kernel void KernelTwoMember(struct StructTwoMember u) { 169 FuncTwoMember(u); 170} 171 172// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeTwoMember 173// AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]]) 174// AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5) 175// AMDGCN: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]] 176// AMDGCN: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %[[u]]) 177kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { 178 FuncLargeTwoMember(u); 179} 180