1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
2
3
4; CHECK-LABEL: @branch_ptr_var_same_alloca(
5; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @branch_ptr_var_same_alloca.alloca, i32 0, i32 %{{[0-9]+}}
6
7; CHECK: if:
8; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
9
10; CHECK: else:
11; CHECK: %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %15, i32 0, i32 %b
12
13; CHECK: endif:
14; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
15; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
16define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
17entry:
18  %alloca = alloca [64 x i32], align 4
19  br i1 undef, label %if, label %else
20
21if:
22  %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
23  br label %endif
24
25else:
26  %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b
27  br label %endif
28
29endif:
30  %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
31  store i32 0, i32* %phi.ptr, align 4
32  ret void
33}
34
35; CHECK-LABEL: @branch_ptr_phi_alloca_null_0(
36; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
37define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
38entry:
39  %alloca = alloca [64 x i32], align 4
40  br i1 undef, label %if, label %endif
41
42if:
43  %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
44  br label %endif
45
46endif:
47  %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ]
48  store i32 0, i32* %phi.ptr, align 4
49  ret void
50}
51
52; CHECK-LABEL: @branch_ptr_phi_alloca_null_1(
53; CHECK: %phi.ptr = phi i32 addrspace(3)*  [ null, %entry ], [ %arrayidx0, %if ]
54define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
55entry:
56  %alloca = alloca [64 x i32], align 4
57  br i1 undef, label %if, label %endif
58
59if:
60  %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
61  br label %endif
62
63endif:
64  %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ]
65  store i32 0, i32* %phi.ptr, align 4
66  ret void
67}
68
69; CHECK-LABEL: @one_phi_value(
70; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14
71; CHECK:  %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
72
73; CHECK: br label %exit
74; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %entry ]
75; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
76define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
77entry:
78  %alloca = alloca [64 x i32], align 4
79  %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
80  br label %exit
81
82exit:
83  %phi.ptr = phi i32* [ %arrayidx0, %entry ]
84  store i32 0, i32* %phi.ptr, align 4
85  ret void
86}
87
88; CHECK-LABEL: @branch_ptr_alloca_unknown_obj(
89; CHECK: %alloca = alloca [64 x i32], align 4
90
91; CHECK: if:
92; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
93
94; CHECK: else:
95; CHECK: %arrayidx1 = call i32* @get_unknown_pointer()
96
97; CHECK: endif:
98; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
99; CHECK: store i32 0, i32* %phi.ptr, align 4
100define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
101entry:
102  %alloca = alloca [64 x i32], align 4
103  br i1 undef, label %if, label %else
104
105if:
106  %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
107  br label %endif
108
109else:
110  %arrayidx1 = call i32* @get_unknown_pointer()
111  br label %endif
112
113endif:
114  %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
115  store i32 0, i32* %phi.ptr, align 4
116  ret void
117}
118
119; kernel void ptr_induction_var_same_alloca(void)
120; {
121;     int alloca[64];
122;     int i = 0;
123
124;     #pragma nounroll
125;     for (int* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i)
126;     {
127;         *p = i;
128;     }
129; }
130
131; FIXME: This should be promotable. We need to use
132; GetUnderlyingObjects when looking at the icmp user.
133
134; CHECK-LABEL: @ptr_induction_var_same_alloca(
135; CHECK: %alloca = alloca [64 x i32], align 4
136; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
137define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
138entry:
139  %alloca = alloca [64 x i32], align 4
140  %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
141  %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48
142  br label %for.body
143
144for.cond.cleanup:                                 ; preds = %for.body
145  ret void
146
147for.body:                                         ; preds = %for.body, %entry
148  %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
149  %p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
150  store i32 %i.09, i32* %p.08, align 4
151  %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
152  %inc = add nuw nsw i32 %i.09, 1
153  %cmp = icmp eq i32* %incdec.ptr, %arrayidx1
154  br i1 %cmp, label %for.cond.cleanup, label %for.body
155}
156
157
158; extern int* get_unknown_pointer(void);
159
160; kernel void ptr_induction_var_alloca_unknown(void)
161; {
162;     int alloca[64];
163;     int i = 0;
164;
165;     for (int* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i)
166;     {
167;         *p = i;
168;     }
169; }
170
171; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
172; CHECK: %alloca = alloca [64 x i32], align 4
173; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
174; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call
175define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
176entry:
177  %alloca = alloca [64 x i32], align 4
178  %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
179  %call = tail call i32* @get_unknown_pointer() #2
180  %cmp.7 = icmp eq i32* %arrayidx, %call
181  br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
182
183for.body.preheader:                               ; preds = %entry
184  br label %for.body
185
186for.cond.cleanup.loopexit:                        ; preds = %for.body
187  br label %for.cond.cleanup
188
189for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
190  ret void
191
192for.body:                                         ; preds = %for.body, %for.body.preheader
193  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
194  %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
195  store i32 %i.09, i32* %p.08, align 4
196  %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
197  %inc = add nuw nsw i32 %i.09, 1
198  %cmp = icmp eq i32* %incdec.ptr, %call
199  br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
200}
201
202declare i32* @get_unknown_pointer() #0
203
204attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }
205