1// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=ALL,X86 %s
2// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s
3// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s
4// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s
5// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN,AMDGCN20 %s
6// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=ALL,AMDGCN %s
7
8typedef int int2 __attribute__((ext_vector_type(2)));
9
10typedef struct {
11  int cells[9];
12} Mat3X3;
13
14typedef struct {
15  int cells[16];
16} Mat4X4;
17
18typedef struct {
19  int cells[1024];
20} Mat32X32;
21
22typedef struct {
23  int cells[4096];
24} Mat64X64;
25
26struct StructOneMember {
27  int2 x;
28};
29
30struct StructTwoMember {
31  int2 x;
32  int2 y;
33};
34
35struct LargeStructOneMember {
36  int2 x[100];
37};
38
39struct LargeStructTwoMember {
40  int2 x[40];
41  int2 y[20];
42};
43
44#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
45struct LargeStructOneMember g_s;
46#endif
47
48// X86-LABEL: define{{.*}} void @foo(%struct.Mat4X4* noalias sret(%struct.Mat4X4) align 4 %agg.result, %struct.Mat3X3* byval(%struct.Mat3X3) align 4 %in)
49// AMDGCN-LABEL: define{{.*}} %struct.Mat4X4 @foo([9 x i32] %in.coerce)
50Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
51  Mat4X4 out;
52  return out;
53}
54
55// ALL-LABEL: define {{.*}} void @ker
56// Expect two mem copies: one for the argument "in", and one for
57// the return value.
58// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
59// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
60
61// AMDGCN: load [9 x i32], [9 x i32] addrspace(1)*
62// AMDGCN: call %struct.Mat4X4 @foo([9 x i32]
63// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
64kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
65  out[0] = foo(in[1]);
66}
67
68// X86-LABEL: define{{.*}} void @foo_large(%struct.Mat64X64* noalias sret(%struct.Mat64X64) align 4 %agg.result, %struct.Mat32X32* byval(%struct.Mat32X32) align 4 %in)
69// AMDGCN-LABEL: define{{.*}} void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret(%struct.Mat64X64) align 4 %agg.result, %struct.Mat32X32 addrspace(5)* byval(%struct.Mat32X32) align 4 %in)
70Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
71  Mat64X64 out;
72  return out;
73}
74
75// ALL-LABEL: define {{.*}} void @ker_large
76// Expect two mem copies: one for the argument "in", and one for
77// the return value.
78// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
79// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
80// AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
81// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
82kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
83  out[0] = foo_large(in[1]);
84}
85
86// AMDGCN-LABEL: define{{.*}} void @FuncOneMember(<2 x i32> %u.coerce)
87void FuncOneMember(struct StructOneMember u) {
88  u.x = (int2)(0, 0);
89}
90
91// AMDGCN-LABEL: define{{.*}} void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %u)
92// AMDGCN-NOT: addrspacecast
93// AMDGCN:   store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
94void FuncOneLargeMember(struct LargeStructOneMember u) {
95  u.x[0] = (int2)(0, 0);
96}
97
98// AMDGCN20-LABEL: define{{.*}} void @test_indirect_arg_globl()
99// AMDGCN20:  %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
100// AMDGCN20:  %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
101// AMDGCN20:  call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
102// AMDGCN20:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
103#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
104void test_indirect_arg_globl(void) {
105  FuncOneLargeMember(g_s);
106}
107#endif
108
109// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @test_indirect_arg_local()
110// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
111// AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
112// AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false)
113// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
114kernel void test_indirect_arg_local(void) {
115  local struct LargeStructOneMember l_s;
116  FuncOneLargeMember(l_s);
117}
118
119// AMDGCN-LABEL: define{{.*}} void @test_indirect_arg_private()
120// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
121// AMDGCN-NOT: @llvm.memcpy
122// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[p_s]])
123void test_indirect_arg_private(void) {
124  struct LargeStructOneMember p_s;
125  FuncOneLargeMember(p_s);
126}
127
128// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelOneMember
129// AMDGCN-SAME:  (<2 x i32> %[[u_coerce:.*]])
130// AMDGCN:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
131// AMDGCN:  %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
132// AMDGCN:  store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
133// AMDGCN:  call void @FuncOneMember(<2 x i32>
134kernel void KernelOneMember(struct StructOneMember u) {
135  FuncOneMember(u);
136}
137
138// SPIR: call void @llvm.memcpy.p0i8.p1i8.i32
139// SPIR-NOT: addrspacecast
140kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
141  FuncOneMember(*u);
142}
143
144// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeOneMember(
145// AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
146// AMDGCN:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
147// AMDGCN:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[U]])
148kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
149  FuncOneLargeMember(u);
150}
151
152// AMDGCN-LABEL: define{{.*}} void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
153void FuncTwoMember(struct StructTwoMember u) {
154  u.y = (int2)(0, 0);
155}
156
157// AMDGCN-LABEL: define{{.*}} void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %u)
158void FuncLargeTwoMember(struct LargeStructTwoMember u) {
159  u.y[0] = (int2)(0, 0);
160}
161
162// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelTwoMember
163// AMDGCN-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
164// AMDGCN:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
165// AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
166// AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
167// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
168kernel void KernelTwoMember(struct StructTwoMember u) {
169  FuncTwoMember(u);
170}
171
172// AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeTwoMember
173// AMDGCN-SAME:  (%struct.LargeStructTwoMember %[[u_coerce:.*]])
174// AMDGCN:  %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
175// AMDGCN:  store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
176// AMDGCN:  call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %[[u]])
177kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
178  FuncLargeTwoMember(u);
179}
180