1 // Test host code gen
2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
8
9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
16
17 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
18 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
19 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
20 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
21 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
22 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
23
24 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
25 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
26 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
27 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
28 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
29 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s
30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}}
31 // expected-no-diagnostics
32 #ifndef HEADER
33 #define HEADER
34
35
36 template <typename T>
tmain()37 T tmain() {
38 T *a, *b, *c;
39 int n = 10000;
40 int ch = 100;
41
42 // no schedule clauses
43 #pragma omp target
44 #pragma omp teams
45 #pragma omp distribute parallel for
46 for (int i = 0; i < n; ++i) {
47 #pragma omp cancel for
48 a[i] = b[i] + c[i];
49 }
50
51 // dist_schedule: static no chunk
52 #pragma omp target
53 #pragma omp teams
54 #pragma omp distribute parallel for dist_schedule(static)
55 for (int i = 0; i < n; ++i) {
56 a[i] = b[i] + c[i];
57 }
58
59 // dist_schedule: static chunk
60 #pragma omp target
61 #pragma omp teams
62 #pragma omp distribute parallel for dist_schedule(static, ch)
63 for (int i = 0; i < n; ++i) {
64 a[i] = b[i] + c[i];
65 }
66
67 // schedule: static no chunk
68 #pragma omp target
69 #pragma omp teams
70 #pragma omp distribute parallel for schedule(static)
71 for (int i = 0; i < n; ++i) {
72 a[i] = b[i] + c[i];
73 }
74
75 // schedule: static chunk
76 #pragma omp target
77 #pragma omp teams
78 #pragma omp distribute parallel for schedule(static, ch)
79 for (int i = 0; i < n; ++i) {
80 a[i] = b[i] + c[i];
81 }
82
83 // schedule: dynamic no chunk
84 #pragma omp target
85 #pragma omp teams
86 #pragma omp distribute parallel for schedule(dynamic)
87 for (int i = 0; i < n; ++i) {
88 a[i] = b[i] + c[i];
89 }
90
91 // schedule: dynamic chunk
92 #pragma omp target
93 #pragma omp teams
94 #pragma omp distribute parallel for schedule(dynamic, ch)
95 for (int i = 0; i < n; ++i) {
96 a[i] = b[i] + c[i];
97 }
98
99 return T();
100 }
101
main()102 int main() {
103 double *a, *b, *c;
104 int n = 10000;
105 int ch = 100;
106
107 #ifdef LAMBDA
108 // LAMBDA-LABEL: @main
109 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
110 [&]() {
111 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
112
113 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
114 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
115
116 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
117 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
118
119 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
120 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
121
122 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
123 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
124
125 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
126 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
127
128 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
129 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
130
131 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
132 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
133
134 // no schedule clauses
135 #pragma omp target
136 #pragma omp teams
137 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]](
138 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
139
140 #pragma omp distribute parallel for
141 for (int i = 0; i < n; ++i) {
142 a[i] = b[i] + c[i];
143 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]](
144 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
145 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
146 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
147 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
148
149 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
150
151 // check EUB for distribute
152 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
153 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
154 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
155 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
156 // LAMBDA-DAG: [[EUB_TRUE]]:
157 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
158 // LAMBDA: br label %[[EUB_END:.+]]
159 // LAMBDA-DAG: [[EUB_FALSE]]:
160 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
161 // LAMBDA: br label %[[EUB_END]]
162 // LAMBDA-DAG: [[EUB_END]]:
163 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
164 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
165
166 // initialize omp.iv
167 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
168 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
169 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
170
171 // check exit condition
172 // LAMBDA: [[OMP_JUMP_BACK]]:
173 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
174 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
175 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
176 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
177
178 // check that PrevLB and PrevUB are passed to the 'for'
179 // LAMBDA: [[DIST_BODY]]:
180 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
181 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
182 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
183 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
184 // check that distlb and distub are properly passed to fork_call
185 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
186 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
187 // LAMBDA: br label %[[DIST_INC:.+]]
188
189 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
190 // LAMBDA: [[DIST_INC]]:
191 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
192 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
193 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
194 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
195 // LAMBDA: br label %[[OMP_JUMP_BACK]]
196
197 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
198 // LAMBDA: ret
199
200 // implementation of 'parallel for'
201 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
202
203 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
204 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
205 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
206
207 // initialize lb and ub to PrevLB and PrevUB
208 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
209 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
210 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
211 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
212 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
213 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
214 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
215 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
216 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
217 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
218 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
219
220 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
221 // In this case we use EUB
222 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
223 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
224 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
225 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
226 // LAMBDA: [[PF_EUB_TRUE]]:
227 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
228 // LAMBDA: br label %[[PF_EUB_END:.+]]
229 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
230 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
231 // LAMBDA: br label %[[PF_EUB_END]]
232 // LAMBDA-DAG: [[PF_EUB_END]]:
233 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
234 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
235
236 // initialize omp.iv
237 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
238 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
239 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
240
241 // check exit condition
242 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
243 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
244 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
245 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
246 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
247
248 // check that PrevLB and PrevUB are passed to the 'for'
249 // LAMBDA: [[PF_BODY]]:
250 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
251 // LAMBDA: br label {{.+}}
252
253 // check stride 1 for 'for' in 'distribute parallel for'
254 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
255 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
256 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
257 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
258
259 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
260 // LAMBDA: ret
261
262 [&]() {
263 a[i] = b[i] + c[i];
264 }();
265 }
266
267 // dist_schedule: static no chunk (same sa default - no dist_schedule)
268 #pragma omp target
269 #pragma omp teams
270 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]](
271 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
272
273 #pragma omp distribute parallel for dist_schedule(static)
274 for (int i = 0; i < n; ++i) {
275 a[i] = b[i] + c[i];
276 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]](
277 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
278 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
279 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
280 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
281
282 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
283
284 // check EUB for distribute
285 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
286 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
287 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
288 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
289 // LAMBDA-DAG: [[EUB_TRUE]]:
290 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
291 // LAMBDA: br label %[[EUB_END:.+]]
292 // LAMBDA-DAG: [[EUB_FALSE]]:
293 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
294 // LAMBDA: br label %[[EUB_END]]
295 // LAMBDA-DAG: [[EUB_END]]:
296 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
297 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
298
299 // initialize omp.iv
300 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
301 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
302 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]]
303
304 // check exit condition
305 // LAMBDA: [[OMP_JUMP_BACK]]:
306 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
307 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
308 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
309 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
310
311 // check that PrevLB and PrevUB are passed to the 'for'
312 // LAMBDA: [[DIST_BODY]]:
313 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
314 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to
315 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
316 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to
317 // check that distlb and distub are properly passed to fork_call
318 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
319 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
320 // LAMBDA: br label %[[DIST_INC:.+]]
321
322 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
323 // LAMBDA: [[DIST_INC]]:
324 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
325 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
326 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
327 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
328 // LAMBDA: br label %[[OMP_JUMP_BACK]]
329
330 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
331 // LAMBDA: ret
332
333 // implementation of 'parallel for'
334 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
335
336 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
337 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
338 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
339
340 // initialize lb and ub to PrevLB and PrevUB
341 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
342 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
343 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
344 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
345 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
346 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
347 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
348 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
349 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
350 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
351 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
352
353 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
354 // In this case we use EUB
355 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
356 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
357 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
358 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
359 // LAMBDA: [[PF_EUB_TRUE]]:
360 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
361 // LAMBDA: br label %[[PF_EUB_END:.+]]
362 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
363 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
364 // LAMBDA: br label %[[PF_EUB_END]]
365 // LAMBDA-DAG: [[PF_EUB_END]]:
366 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
367 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
368
369 // initialize omp.iv
370 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
371 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
372 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
373
374 // check exit condition
375 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
376 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
377 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
378 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
379 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
380
381 // check that PrevLB and PrevUB are passed to the 'for'
382 // LAMBDA: [[PF_BODY]]:
383 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
384 // LAMBDA: br label {{.+}}
385
386 // check stride 1 for 'for' in 'distribute parallel for'
387 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
388 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
389 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
390 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
391
392 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
393 // LAMBDA: ret
394 [&]() {
395 a[i] = b[i] + c[i];
396 }();
397 }
398
399 // dist_schedule: static chunk
400 #pragma omp target
401 #pragma omp teams
402 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]](
403 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
404
405 #pragma omp distribute parallel for dist_schedule(static, ch)
406 for (int i = 0; i < n; ++i) {
407 a[i] = b[i] + c[i];
408 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
409 // LAMBDA: alloca
410 // LAMBDA: alloca
411 // LAMBDA: alloca
412 // LAMBDA: alloca
413 // LAMBDA: alloca
414 // LAMBDA: alloca
415 // LAMBDA: alloca
416 // LAMBDA: [[OMP_IV:%.+]] = alloca
417 // LAMBDA: alloca
418 // LAMBDA: alloca
419 // LAMBDA: alloca
420 // LAMBDA: alloca
421 // LAMBDA: [[OMP_LB:%.+]] = alloca
422 // LAMBDA: [[OMP_UB:%.+]] = alloca
423 // LAMBDA: [[OMP_ST:%.+]] = alloca
424
425 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
426
427 // check EUB for distribute
428 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
429 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}
430 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
431 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
432 // LAMBDA-DAG: [[EUB_TRUE]]:
433 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}},
434 // LAMBDA: br label %[[EUB_END:.+]]
435 // LAMBDA-DAG: [[EUB_FALSE]]:
436 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
437 // LAMBDA: br label %[[EUB_END]]
438 // LAMBDA-DAG: [[EUB_END]]:
439 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
440 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
441
442 // initialize omp.iv
443 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
444 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
445
446 // check exit condition
447 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
448 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
449 // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
450 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
451 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
452
453 // check that PrevLB and PrevUB are passed to the 'for'
454 // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
455 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
456 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
457 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
458 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
459 // check that distlb and distub are properly passed to fork_call
460 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
461 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
462 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]]
463
464 // check DistInc
465 // LAMBDA: [[DIST_INNER_LOOP_INC]]:
466 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
467 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
468 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
469 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
470 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
471 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
472 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
473 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
474 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
475 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
476 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
477 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
478
479 // Update UB
480 // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
481 // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
482 // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
483 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
484 // LAMBDA-DAG: [[EUB_TRUE_1]]:
485 // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}}
486 // LAMBDA: br label %[[EUB_END_1:.+]]
487 // LAMBDA-DAG: [[EUB_FALSE_1]]:
488 // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
489 // LAMBDA: br label %[[EUB_END_1]]
490 // LAMBDA-DAG: [[EUB_END_1]]:
491 // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
492 // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
493
494 // Store LB in IV
495 // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
496 // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
497
498 // LAMBDA: [[DIST_INNER_LOOP_END]]:
499 // LAMBDA: br label %[[LOOP_EXIT:.+]]
500
501 // loop exit
502 // LAMBDA: [[LOOP_EXIT]]:
503 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
504 // LAMBDA: ret
505
506 // skip implementation of 'parallel for': using default scheduling and was tested above
507 [&]() {
508 a[i] = b[i] + c[i];
509 }();
510 }
511
512 // schedule: static no chunk
513 #pragma omp target
514 #pragma omp teams
515 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]](
516 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
517
518 #pragma omp distribute parallel for schedule(static)
519 for (int i = 0; i < n; ++i) {
520 a[i] = b[i] + c[i];
521 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]](
522 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
523 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
524 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
525 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
526
527 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
528 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
529 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
530 // LAMBDA: ret
531
532 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
533 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
534
535 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
536 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
537 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
538
539 // initialize lb and ub to PrevLB and PrevUB
540 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
541 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
542 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
543 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
544 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
545 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
546 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
547 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
548 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
549 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
550 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
551
552 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
553 // In this case we use EUB
554 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
555 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}},
556 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
557 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
558 // LAMBDA: [[PF_EUB_TRUE]]:
559 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}},
560 // LAMBDA: br label %[[PF_EUB_END:.+]]
561 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
562 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
563 // LAMBDA: br label %[[PF_EUB_END]]
564 // LAMBDA-DAG: [[PF_EUB_END]]:
565 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
566 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
567
568 // initialize omp.iv
569 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
570 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
571 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]]
572
573 // check exit condition
574 // LAMBDA: [[OMP_PF_JUMP_BACK]]:
575 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
576 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
577 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
578 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
579
580 // check that PrevLB and PrevUB are passed to the 'for'
581 // LAMBDA: [[PF_BODY]]:
582 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
583 // LAMBDA: br label {{.+}}
584
585 // check stride 1 for 'for' in 'distribute parallel for'
586 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
587 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
588 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
589 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]]
590
591 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
592 // LAMBDA: ret
593
594 [&]() {
595 a[i] = b[i] + c[i];
596 }();
597 }
598
599 // schedule: static chunk
600 #pragma omp target
601 #pragma omp teams
602 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]](
603 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
604
605 #pragma omp distribute parallel for schedule(static, ch)
606 for (int i = 0; i < n; ++i) {
607 a[i] = b[i] + c[i];
608 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]](
609 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
610 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
611 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
612 // LAMBDA: ret
613
614 // 'parallel for' implementation using outer and inner loops and PrevEUB
615 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
616 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
617 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
618 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
619 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
620
621 // initialize lb and ub to PrevLB and PrevUB
622 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
623 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
624 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
625 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
626 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
627 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
628 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
629 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
630 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
631 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
632 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
633 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
634
635 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
636 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
637 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
638 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
639 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
640 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
641 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
642 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
643 // LAMBDA: [[PF_EUB_TRUE]]:
644 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
645 // LAMBDA: br label %[[PF_EUB_END:.+]]
646 // LAMBDA-DAG: [[PF_EUB_FALSE]]:
647 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
648 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
649 // LAMBDA: br label %[[PF_EUB_END]]
650 // LAMBDA-DAG: [[PF_EUB_END]]:
651 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
652 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
653 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
654 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
655 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
656
657 // initialize omp.iv (IV = LB)
658 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
659 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
660
661 // outer loop: while (IV < UB) {
662 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
663 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
664 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
665 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
666
667 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
668 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
669
670 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]:
671 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
672 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
673 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
674 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
675
676 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
677 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
678 // skip body branch
679 // LAMBDA: br{{.+}}
680 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
681
682 // IV = IV + 1 and inner loop latch
683 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
684 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
685 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
686 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
687 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]]
688
689 // check NextLB and NextUB
690 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
691 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
692
693 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
694 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
695 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
696 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
697 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
698 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
699 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
700 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
701 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
702 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
703
704 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
705 // LAMBDA-DAG: call void @__kmpc_for_static_fini(
706 // LAMBDA: ret
707 [&]() {
708 a[i] = b[i] + c[i];
709 }();
710 }
711
712 // schedule: dynamic no chunk
713 #pragma omp target
714 #pragma omp teams
715 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]](
716 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
717
718 #pragma omp distribute parallel for schedule(dynamic)
719 for (int i = 0; i < n; ++i) {
720 a[i] = b[i] + c[i];
721 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]](
722 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
723 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
724 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
725 // LAMBDA: ret
726
727 // 'parallel for' implementation using outer and inner loops and PrevEUB
728 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
729 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
730 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
731 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
732 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
733
734 // initialize lb and ub to PrevLB and PrevUB
735 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
736 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
737 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
738 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
739 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
740 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
741 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
742 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
743 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
744 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
745 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
746 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
747 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
748 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
749
750 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
751 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
752 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
753 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
754
755 // initialize omp.iv (IV = LB)
756 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
757 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
758 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
759 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
760
761 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
762 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
763 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
764 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
765 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
766
767 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
768 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
769 // skip body branch
770 // LAMBDA: br{{.+}}
771 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
772
773 // IV = IV + 1 and inner loop latch
774 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
775 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
776 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
777 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
778 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
779
780 // check NextLB and NextUB
781 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
782 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
783
784 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
785 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
786
787 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
788 // LAMBDA: ret
789 [&]() {
790 a[i] = b[i] + c[i];
791 }();
792 }
793
794 // schedule: dynamic chunk
795 #pragma omp target
796 #pragma omp teams
797 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]](
798 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
799
800 #pragma omp distribute parallel for schedule(dynamic, ch)
801 for (int i = 0; i < n; ++i) {
802 a[i] = b[i] + c[i];
803 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]](
804 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
805 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
806 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
807 // LAMBDA: ret
808
809 // 'parallel for' implementation using outer and inner loops and PrevEUB
810 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
811 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
812 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
813 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
814 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
815
816 // initialize lb and ub to PrevLB and PrevUB
817 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
818 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
819 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
820 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
821 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
822 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
823 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
824 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
825 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
826 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
827 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
828 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
829 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
830 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
831
832 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]:
833 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
834 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
835 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
836
837 // initialize omp.iv (IV = LB)
838 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]:
839 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
840 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
841 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
842
843 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]:
844 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
845 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
846 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
847 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
848
849 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]:
850 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
851 // skip body branch
852 // LAMBDA: br{{.+}}
853 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
854
855 // IV = IV + 1 and inner loop latch
856 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]:
857 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
858 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
859 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
860 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]]
861
862 // check NextLB and NextUB
863 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]:
864 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
865
866 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]:
867 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
868
869 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]:
870 // LAMBDA: ret
871 [&]() {
872 a[i] = b[i] + c[i];
873 }();
874 }
875 }();
876 return 0;
877 #else
878 // CHECK-LABEL: @main
879
880 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
881 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
882
883 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
884 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
885
886 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
887 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
888
889 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
890 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
891
892 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
893 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
894
895 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
896 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
897
898 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
899 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
900
901 // CHECK: call{{.+}} [[TMAIN:@.+]]()
902
903 // no schedule clauses
904 #pragma omp target
905 #pragma omp teams
906 // CHECK: define internal void [[OFFLOADING_FUN_1]](
907 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
908
909 #pragma omp distribute parallel for
910 for (int i = 0; i < n; ++i) {
911 a[i] = b[i] + c[i];
912 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
913 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
914 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
915 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
916 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
917
918 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
919
920 // check EUB for distribute
921 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
922 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
923 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
924 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
925 // CHECK-DAG: [[EUB_TRUE]]:
926 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
927 // CHECK: br label %[[EUB_END:.+]]
928 // CHECK-DAG: [[EUB_FALSE]]:
929 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
930 // CHECK: br label %[[EUB_END]]
931 // CHECK-DAG: [[EUB_END]]:
932 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
933 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
934
935 // initialize omp.iv
936 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
937 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
938 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
939
940 // check exit condition
941 // CHECK: [[OMP_JUMP_BACK]]:
942 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
943 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
944 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
945 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
946
947 // check that PrevLB and PrevUB are passed to the 'for'
948 // CHECK: [[DIST_BODY]]:
949 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
950 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
951 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
952 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
953 // check that distlb and distub are properly passed to fork_call
954 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
955 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
956 // CHECK: br label %[[DIST_INC:.+]]
957
958 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
959 // CHECK: [[DIST_INC]]:
960 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
961 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
962 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
963 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
964 // CHECK: br label %[[OMP_JUMP_BACK]]
965
966 // CHECK-DAG: call void @__kmpc_for_static_fini(
967 // CHECK: ret
968
969 // implementation of 'parallel for'
970 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
971
972 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
973 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
974 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
975
976 // initialize lb and ub to PrevLB and PrevUB
977 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
978 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
979 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
980 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
981 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
982 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
983 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
984 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
985 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
986 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
987 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
988
989 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
990 // In this case we use EUB
991 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
992 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
993 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
994 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
995 // CHECK: [[PF_EUB_TRUE]]:
996 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
997 // CHECK: br label %[[PF_EUB_END:.+]]
998 // CHECK-DAG: [[PF_EUB_FALSE]]:
999 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1000 // CHECK: br label %[[PF_EUB_END]]
1001 // CHECK-DAG: [[PF_EUB_END]]:
1002 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1003 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1004
1005 // initialize omp.iv
1006 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1007 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1008 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1009
1010 // check exit condition
1011 // CHECK: [[OMP_PF_JUMP_BACK]]:
1012 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1013 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1014 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1015 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1016
1017 // check that PrevLB and PrevUB are passed to the 'for'
1018 // CHECK: [[PF_BODY]]:
1019 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1020 // CHECK: br label {{.+}}
1021
1022 // check stride 1 for 'for' in 'distribute parallel for'
1023 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1024 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1025 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1026 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1027
1028 // CHECK-DAG: call void @__kmpc_for_static_fini(
1029 // CHECK: ret
1030 }
1031
1032 // dist_schedule: static no chunk
1033 #pragma omp target
1034 #pragma omp teams
1035 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1036 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1037
1038 #pragma omp distribute parallel for dist_schedule(static)
1039 for (int i = 0; i < n; ++i) {
1040 a[i] = b[i] + c[i];
1041 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1042 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1043 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1044 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1045 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1046
1047 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1048
1049 // check EUB for distribute
1050 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1051 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1052 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1053 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1054 // CHECK-DAG: [[EUB_TRUE]]:
1055 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1056 // CHECK: br label %[[EUB_END:.+]]
1057 // CHECK-DAG: [[EUB_FALSE]]:
1058 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1059 // CHECK: br label %[[EUB_END]]
1060 // CHECK-DAG: [[EUB_END]]:
1061 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1062 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1063
1064 // initialize omp.iv
1065 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1066 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1067 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1068
1069 // check exit condition
1070 // CHECK: [[OMP_JUMP_BACK]]:
1071 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1072 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1073 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1074 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1075
1076 // check that PrevLB and PrevUB are passed to the 'for'
1077 // CHECK: [[DIST_BODY]]:
1078 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1079 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1080 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1081 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1082 // check that distlb and distub are properly passed to fork_call
1083 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1084 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1085 // CHECK: br label %[[DIST_INC:.+]]
1086
1087 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1088 // CHECK: [[DIST_INC]]:
1089 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1090 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1091 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1092 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1093 // CHECK: br label %[[OMP_JUMP_BACK]]
1094
1095 // CHECK-DAG: call void @__kmpc_for_static_fini(
1096 // CHECK: ret
1097
1098 // implementation of 'parallel for'
1099 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1100
1101 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1102 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1103 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1104
1105 // initialize lb and ub to PrevLB and PrevUB
1106 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1107 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1108 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1109 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1110 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1111 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1112 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1113 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1114 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1115 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1116 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1117
1118 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1119 // In this case we use EUB
1120 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1121 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1122 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1123 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1124 // CHECK: [[PF_EUB_TRUE]]:
1125 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1126 // CHECK: br label %[[PF_EUB_END:.+]]
1127 // CHECK-DAG: [[PF_EUB_FALSE]]:
1128 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1129 // CHECK: br label %[[PF_EUB_END]]
1130 // CHECK-DAG: [[PF_EUB_END]]:
1131 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1132 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1133
1134 // initialize omp.iv
1135 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1136 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1137 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1138
1139 // check exit condition
1140 // CHECK: [[OMP_PF_JUMP_BACK]]:
1141 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1142 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1143 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1144 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1145
1146 // check that PrevLB and PrevUB are passed to the 'for'
1147 // CHECK: [[PF_BODY]]:
1148 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1149 // CHECK: br label {{.+}}
1150
1151 // check stride 1 for 'for' in 'distribute parallel for'
1152 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1153 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1154 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1155 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1156
1157 // CHECK-DAG: call void @__kmpc_for_static_fini(
1158 // CHECK: ret
1159 }
1160
1161 // dist_schedule: static chunk
1162 #pragma omp target
1163 #pragma omp teams
1164 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1165 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1166
1167 #pragma omp distribute parallel for dist_schedule(static, ch)
1168 for (int i = 0; i < n; ++i) {
1169 a[i] = b[i] + c[i];
1170 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1171 // CHECK: alloca
1172 // CHECK: alloca
1173 // CHECK: alloca
1174 // CHECK: alloca
1175 // CHECK: alloca
1176 // CHECK: alloca
1177 // CHECK: alloca
1178 // CHECK: [[OMP_IV:%.+]] = alloca
1179 // CHECK: alloca
1180 // CHECK: alloca
1181 // CHECK: alloca
1182 // CHECK: alloca
1183 // CHECK: [[OMP_LB:%.+]] = alloca
1184 // CHECK: [[OMP_UB:%.+]] = alloca
1185 // CHECK: [[OMP_ST:%.+]] = alloca
1186
1187 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1188 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1189
1190 // check EUB for distribute
1191 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1192 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
1193 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1194 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1195 // CHECK-DAG: [[EUB_TRUE]]:
1196 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1197 // CHECK: br label %[[EUB_END:.+]]
1198 // CHECK-DAG: [[EUB_FALSE]]:
1199 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1200 // CHECK: br label %[[EUB_END]]
1201 // CHECK-DAG: [[EUB_END]]:
1202 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1203 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1204
1205 // initialize omp.iv
1206 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1207 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1208
1209 // check exit condition
1210 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1211 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
1212 // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
1213 // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
1214 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1215
1216 // check that PrevLB and PrevUB are passed to the 'for'
1217 // CHECK: [[DIST_INNER_LOOP_BODY]]:
1218 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1219 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1220 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1221 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1222 // check that distlb and distub are properly passed to fork_call
1223 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1224 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1225 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1226
1227 // check DistInc
1228 // CHECK: [[DIST_INNER_LOOP_INC]]:
1229 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1230 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1231 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1232 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1233 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1234 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1235 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1236 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1237 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1238 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1239 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1240 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1241
1242 // Update UB
1243 // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1244 // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
1245 // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
1246 // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
1247 // CHECK-DAG: [[EUB_TRUE_1]]:
1248 // CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
1249 // CHECK: br label %[[EUB_END_1:.+]]
1250 // CHECK-DAG: [[EUB_FALSE_1]]:
1251 // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
1252 // CHECK: br label %[[EUB_END_1]]
1253 // CHECK-DAG: [[EUB_END_1]]:
1254 // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
1255 // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
1256
1257 // Store LB in IV
1258 // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1259 // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
1260
1261 // CHECK: [[DIST_INNER_LOOP_END]]:
1262 // CHECK: br label %[[LOOP_EXIT:.+]]
1263
1264 // loop exit
1265 // CHECK: [[LOOP_EXIT]]:
1266 // CHECK-DAG: call void @__kmpc_for_static_fini(
1267 // CHECK: ret
1268
1269 // skip implementation of 'parallel for': using default scheduling and was tested above
1270 }
1271
1272 // schedule: static no chunk
1273 #pragma omp target
1274 #pragma omp teams
1275 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
1276 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
1277
1278 #pragma omp distribute parallel for schedule(static)
1279 for (int i = 0; i < n; ++i) {
1280 a[i] = b[i] + c[i];
1281 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
1282 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1283 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1284 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1285 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1286
1287 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1288 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
1289 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1290 // CHECK: ret
1291
1292 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
1293 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1294
1295 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1296 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1297 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1298
1299 // initialize lb and ub to PrevLB and PrevUB
1300 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1301 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1302 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1303 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1304 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1305 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1306 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1307 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1308 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1309 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1310 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1311
1312 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1313 // In this case we use EUB
1314 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1315 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1316 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1317 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1318 // CHECK: [[PF_EUB_TRUE]]:
1319 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1320 // CHECK: br label %[[PF_EUB_END:.+]]
1321 // CHECK-DAG: [[PF_EUB_FALSE]]:
1322 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1323 // CHECK: br label %[[PF_EUB_END]]
1324 // CHECK-DAG: [[PF_EUB_END]]:
1325 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1326 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1327
1328 // initialize omp.iv
1329 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1330 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1331 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1332
1333 // check exit condition
1334 // CHECK: [[OMP_PF_JUMP_BACK]]:
1335 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1336 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1337 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1338 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1339
1340 // check that PrevLB and PrevUB are passed to the 'for'
1341 // CHECK: [[PF_BODY]]:
1342 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1343 // CHECK: br label {{.+}}
1344
1345 // check stride 1 for 'for' in 'distribute parallel for'
1346 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1347 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1348 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1349 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1350
1351 // CHECK-DAG: call void @__kmpc_for_static_fini(
1352 // CHECK: ret
1353 }
1354
1355 // schedule: static chunk
1356 #pragma omp target
1357 #pragma omp teams
1358 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
1359 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
1360
1361 #pragma omp distribute parallel for schedule(static, ch)
1362 for (int i = 0; i < n; ++i) {
1363 a[i] = b[i] + c[i];
1364 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
1365 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1366 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
1367 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1368 // CHECK: ret
1369
1370 // 'parallel for' implementation using outer and inner loops and PrevEUB
1371 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1372 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1373 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1374 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1375 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1376
1377 // initialize lb and ub to PrevLB and PrevUB
1378 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1379 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1380 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1381 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1382 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1383 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1384 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1385 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1386 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1387 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1388 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1389 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1390
1391 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
1392 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1393 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1394 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
1395 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1396 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
1397 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
1398 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1399 // CHECK: [[PF_EUB_TRUE]]:
1400 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1401 // CHECK: br label %[[PF_EUB_END:.+]]
1402 // CHECK-DAG: [[PF_EUB_FALSE]]:
1403 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1404 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
1405 // CHECK: br label %[[PF_EUB_END]]
1406 // CHECK-DAG: [[PF_EUB_END]]:
1407 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
1408 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
1409 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
1410 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
1411 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
1412
1413 // initialize omp.iv (IV = LB)
1414 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1415 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1416
1417 // outer loop: while (IV < UB) {
1418 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1419 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1420 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1421 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1422
1423 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1424 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
1425
1426 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
1427 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1428 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1429 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1430 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1431
1432 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1433 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1434 // skip body branch
1435 // CHECK: br{{.+}}
1436 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1437
1438 // IV = IV + 1 and inner loop latch
1439 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1440 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1441 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1442 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1443 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
1444
1445 // check NextLB and NextUB
1446 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1447 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1448
1449 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1450 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1451 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1452 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
1453 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
1454 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1455 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
1456 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
1457 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
1458 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1459
1460 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1461 // CHECK-DAG: call void @__kmpc_for_static_fini(
1462 // CHECK: ret
1463 }
1464
1465 // schedule: dynamic no chunk
1466 #pragma omp target
1467 #pragma omp teams
1468 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
1469 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
1470
1471 #pragma omp distribute parallel for schedule(dynamic)
1472 for (int i = 0; i < n; ++i) {
1473 a[i] = b[i] + c[i];
1474 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
1475 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1476 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
1477 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1478 // CHECK: ret
1479
1480 // 'parallel for' implementation using outer and inner loops and PrevEUB
1481 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1482 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1483 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1484 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1485 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1486
1487 // initialize lb and ub to PrevLB and PrevUB
1488 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1489 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1490 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1491 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1492 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1493 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1494 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1495 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1496 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1497 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1498 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1499 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1500 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1501 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1502
1503 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1504 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1505 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1506 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1507
1508 // initialize omp.iv (IV = LB)
1509 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1510 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1511 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1512 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1513
1514 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1515 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1516 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1517 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1518 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1519
1520 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1521 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1522 // skip body branch
1523 // CHECK: br{{.+}}
1524 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1525
1526 // IV = IV + 1 and inner loop latch
1527 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1528 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1529 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1530 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1531 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1532
1533 // check NextLB and NextUB
1534 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1535 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1536
1537 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1538 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1539
1540 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1541 // CHECK: ret
1542 }
1543
1544 // schedule: dynamic chunk
1545 #pragma omp target
1546 #pragma omp teams
1547 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
1548 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
1549
1550 #pragma omp distribute parallel for schedule(dynamic, ch)
1551 for (int i = 0; i < n; ++i) {
1552 a[i] = b[i] + c[i];
1553 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
1554 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1555 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
1556 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
1557 // CHECK: ret
1558
1559 // 'parallel for' implementation using outer and inner loops and PrevEUB
1560 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
1561 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1562 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1563 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1564 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
1565
1566 // initialize lb and ub to PrevLB and PrevUB
1567 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1568 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1569 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1570 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1571 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1572 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1573 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1574 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1575 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1576 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1577 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1578 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
1579 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
1580 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
1581
1582 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
1583 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
1584 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
1585 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
1586
1587 // initialize omp.iv (IV = LB)
1588 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
1589 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1590 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1591 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
1592
1593 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
1594 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1595 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
1596 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
1597 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
1598
1599 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
1600 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1601 // skip body branch
1602 // CHECK: br{{.+}}
1603 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
1604
1605 // IV = IV + 1 and inner loop latch
1606 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
1607 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
1608 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
1609 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
1610 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
1611
1612 // check NextLB and NextUB
1613 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
1614 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
1615
1616 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
1617 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
1618
1619 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
1620 // CHECK: ret
1621 }
1622
1623 return tmain<int>();
1624 #endif
1625 }
1626
1627 // check code
1628 // CHECK: define{{.+}} [[TMAIN]]()
1629
1630 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1631 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
1632
1633 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1634 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
1635
1636 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1637 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
1638
1639 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1640 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
1641
1642 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1643 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
1644
1645 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1646 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
1647
1648 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
1649 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
1650
1651 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
1652 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
1653
1654 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
1655 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1656 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1657 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1658 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1659
1660 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1661
1662 // check EUB for distribute
1663 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1664 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1665 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1666 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1667 // CHECK-DAG: [[EUB_TRUE]]:
1668 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1669 // CHECK: br label %[[EUB_END:.+]]
1670 // CHECK-DAG: [[EUB_FALSE]]:
1671 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1672 // CHECK: br label %[[EUB_END]]
1673 // CHECK-DAG: [[EUB_END]]:
1674 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1675 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1676
1677 // initialize omp.iv
1678 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1679 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1680 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1681
1682 // check exit condition
1683 // CHECK: [[OMP_JUMP_BACK]]:
1684 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1685 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1686 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1687 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1688
1689 // check that PrevLB and PrevUB are passed to the 'for'
1690 // CHECK: [[DIST_BODY]]:
1691 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1692 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1693 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1694 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1695 // check that distlb and distub are properly passed to fork_call
1696 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1697 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1698 // CHECK: br label %[[DIST_INC:.+]]
1699
1700 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1701 // CHECK: [[DIST_INC]]:
1702 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1703 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1704 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1705 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1706 // CHECK: br label %[[OMP_JUMP_BACK]]
1707
1708 // CHECK-DAG: call void @__kmpc_for_static_fini(
1709 // CHECK: ret
1710
1711 // implementation of 'parallel for'
1712 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1713
1714 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1715 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1716 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1717
1718 // initialize lb and ub to PrevLB and PrevUB
1719 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1720 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1721 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1722 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1723 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1724 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1725 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1726 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1727 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1728 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1729 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1730
1731 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1732 // In this case we use EUB
1733 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1734 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1735 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1736 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1737 // CHECK: [[PF_EUB_TRUE]]:
1738 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1739 // CHECK: br label %[[PF_EUB_END:.+]]
1740 // CHECK-DAG: [[PF_EUB_FALSE]]:
1741 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1742 // CHECK: br label %[[PF_EUB_END]]
1743 // CHECK-DAG: [[PF_EUB_END]]:
1744 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1745 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1746
1747 // initialize omp.iv
1748 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1749 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1750 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1751
1752 // check exit condition
1753 // CHECK: [[OMP_PF_JUMP_BACK]]:
1754 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1755 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1756 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1757 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1758
1759 // check that PrevLB and PrevUB are passed to the 'for'
1760 // CHECK: [[PF_BODY]]:
1761 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1762 // CHECK: br label {{.+}}
1763
1764 // check stride 1 for 'for' in 'distribute parallel for'
1765 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1766 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1767 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1768 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1769
1770 // CHECK-DAG: call void @__kmpc_for_static_fini(
1771 // CHECK: ret
1772
1773 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]](
1774 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}})
1775
1776 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
1777 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
1778 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
1779 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
1780 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
1781
1782 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
1783
1784 // check EUB for distribute
1785 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1786 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
1787 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1788 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1789 // CHECK-DAG: [[EUB_TRUE]]:
1790 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1791 // CHECK: br label %[[EUB_END:.+]]
1792 // CHECK-DAG: [[EUB_FALSE]]:
1793 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1794 // CHECK: br label %[[EUB_END]]
1795 // CHECK-DAG: [[EUB_END]]:
1796 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1797 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1798
1799 // initialize omp.iv
1800 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1801 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1802 // CHECK: br label %[[OMP_JUMP_BACK:.+]]
1803
1804 // check exit condition
1805 // CHECK: [[OMP_JUMP_BACK]]:
1806 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1807 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
1808 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
1809 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]]
1810
1811 // check that PrevLB and PrevUB are passed to the 'for'
1812 // CHECK: [[DIST_BODY]]:
1813 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1814 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1815 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1816 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1817 // check that distlb and distub are properly passed to fork_call
1818 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1819 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1820 // CHECK: br label %[[DIST_INC:.+]]
1821
1822 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch
1823 // CHECK: [[DIST_INC]]:
1824 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1825 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1826 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]]
1827 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1828 // CHECK: br label %[[OMP_JUMP_BACK]]
1829
1830 // CHECK-DAG: call void @__kmpc_for_static_fini(
1831 // CHECK: ret
1832
1833 // implementation of 'parallel for'
1834 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
1835
1836 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
1837 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
1838 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
1839
1840 // initialize lb and ub to PrevLB and PrevUB
1841 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
1842 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
1843 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
1844 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
1845 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
1846 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
1847 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
1848 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
1849 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
1850 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
1851 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
1852
1853 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
1854 // In this case we use EUB
1855 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
1856 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
1857 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
1858 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
1859 // CHECK: [[PF_EUB_TRUE]]:
1860 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
1861 // CHECK: br label %[[PF_EUB_END:.+]]
1862 // CHECK-DAG: [[PF_EUB_FALSE]]:
1863 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
1864 // CHECK: br label %[[PF_EUB_END]]
1865 // CHECK-DAG: [[PF_EUB_END]]:
1866 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
1867 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
1868
1869 // initialize omp.iv
1870 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
1871 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
1872 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
1873
1874 // check exit condition
1875 // CHECK: [[OMP_PF_JUMP_BACK]]:
1876 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
1877 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
1878 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
1879 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
1880
1881 // check that PrevLB and PrevUB are passed to the 'for'
1882 // CHECK: [[PF_BODY]]:
1883 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
1884 // CHECK: br label {{.+}}
1885
1886 // check stride 1 for 'for' in 'distribute parallel for'
1887 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
1888 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
1889 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
1890 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
1891
1892 // CHECK-DAG: call void @__kmpc_for_static_fini(
1893 // CHECK: ret
1894
1895 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]](
1896 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
1897
1898 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
1899 // CHECK: alloca
1900 // CHECK: alloca
1901 // CHECK: alloca
1902 // CHECK: alloca
1903 // CHECK: alloca
1904 // CHECK: alloca
1905 // CHECK: alloca
1906 // CHECK: [[OMP_IV:%.+]] = alloca
1907 // CHECK: alloca
1908 // CHECK: alloca
1909 // CHECK: alloca
1910 // CHECK: alloca
1911 // CHECK: [[OMP_LB:%.+]] = alloca
1912 // CHECK: [[OMP_UB:%.+]] = alloca
1913 // CHECK: [[OMP_ST:%.+]] = alloca
1914
1915 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
1916 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
1917
1918 // check EUB for distribute
1919 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
1920 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
1921 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
1922 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
1923 // CHECK-DAG: [[EUB_TRUE]]:
1924 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}},
1925 // CHECK: br label %[[EUB_END:.+]]
1926 // CHECK-DAG: [[EUB_FALSE]]:
1927 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
1928 // CHECK: br label %[[EUB_END]]
1929 // CHECK-DAG: [[EUB_END]]:
1930 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
1931 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
1932
1933 // initialize omp.iv
1934 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
1935 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
1936
1937 // check exit condition
1938 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
1939 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
1940 // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
1941 // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
1942 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
1943
1944 // check that PrevLB and PrevUB are passed to the 'for'
1945 // CHECK: [[DIST_INNER_LOOP_BODY]]:
1946 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
1947 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
1948 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
1949 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
1950 // check that distlb and distub are properly passed to fork_call
1951 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}})
1952 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}})
1953 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]]
1954
1955 // check DistInc
1956 // CHECK: [[DIST_INNER_LOOP_INC]]:
1957 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
1958 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
1959 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
1960 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
1961 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1962 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1963 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
1964 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
1965 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1966 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
1967 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
1968 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
1969
1970 // Update UB
1971 // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
1972 // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
1973 // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
1974 // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
1975 // CHECK-DAG: [[EUB_TRUE_1]]:
1976 // CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
1977 // CHECK: br label %[[EUB_END_1:.+]]
1978 // CHECK-DAG: [[EUB_FALSE_1]]:
1979 // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
1980 // CHECK: br label %[[EUB_END_1]]
1981 // CHECK-DAG: [[EUB_END_1]]:
1982 // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
1983 // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
1984
1985 // Store LB in IV
1986 // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
1987 // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
1988
1989 // CHECK: [[DIST_INNER_LOOP_END]]:
1990 // CHECK: br label %[[LOOP_EXIT:.+]]
1991
1992 // loop exit
1993 // CHECK: [[LOOP_EXIT]]:
1994 // CHECK-DAG: call void @__kmpc_for_static_fini(
1995 // CHECK: ret
1996
1997 // skip implementation of 'parallel for': using default scheduling and was tested above
1998
1999 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]](
2000 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}})
2001
2002 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
2003 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
2004 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
2005 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
2006 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
2007
2008 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2009 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}},
2010 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2011 // CHECK: ret
2012
2013 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default)
2014 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
2015
2016 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2017 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2018 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2019
2020 // initialize lb and ub to PrevLB and PrevUB
2021 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2022 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2023 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2024 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2025 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2026 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2027 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2028 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2029 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2030 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2031 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
2032
2033 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used
2034 // In this case we use EUB
2035 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
2036 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}},
2037 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]]
2038 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
2039 // CHECK: [[PF_EUB_TRUE]]:
2040 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}},
2041 // CHECK: br label %[[PF_EUB_END:.+]]
2042 // CHECK-DAG: [[PF_EUB_FALSE]]:
2043 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]],
2044 // CHECK: br label %[[PF_EUB_END]]
2045 // CHECK-DAG: [[PF_EUB_END]]:
2046 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ]
2047 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]],
2048
2049 // initialize omp.iv
2050 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2051 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2052 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]]
2053
2054 // check exit condition
2055 // CHECK: [[OMP_PF_JUMP_BACK]]:
2056 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]],
2057 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]],
2058 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2059 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]]
2060
2061 // check that PrevLB and PrevUB are passed to the 'for'
2062 // CHECK: [[PF_BODY]]:
2063 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2064 // CHECK: br label {{.+}}
2065
2066 // check stride 1 for 'for' in 'distribute parallel for'
2067 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]],
2068 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1
2069 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]],
2070 // CHECK: br label %[[OMP_PF_JUMP_BACK]]
2071
2072 // CHECK-DAG: call void @__kmpc_for_static_fini(
2073 // CHECK: ret
2074
2075 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]](
2076 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}})
2077
2078 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]](
2079 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2080 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}},
2081 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2082 // CHECK: ret
2083
2084 // 'parallel for' implementation using outer and inner loops and PrevEUB
2085 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2086 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2087 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2088 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2089 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2090
2091 // initialize lb and ub to PrevLB and PrevUB
2092 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2093 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2094 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2095 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2096 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2097 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2098 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2099 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2100 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2101 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2102 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}})
2103 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2104
2105 // check PrevEUB (using PrevUB instead of NumIt as upper bound)
2106 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2107 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]],
2108 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to
2109 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2110 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]]
2111 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]]
2112 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]]
2113 // CHECK: [[PF_EUB_TRUE]]:
2114 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2115 // CHECK: br label %[[PF_EUB_END:.+]]
2116 // CHECK-DAG: [[PF_EUB_FALSE]]:
2117 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]],
2118 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to
2119 // CHECK: br label %[[PF_EUB_END]]
2120 // CHECK-DAG: [[PF_EUB_END]]:
2121 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ]
2122 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ]
2123 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to
2124 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]],
2125 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]],
2126
2127 // initialize omp.iv (IV = LB)
2128 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2129 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2130
2131 // outer loop: while (IV < UB) {
2132 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2133 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2134 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]]
2135 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2136
2137 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2138 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]]
2139
2140 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]:
2141 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2142 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2143 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2144 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2145
2146 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2147 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2148 // skip body branch
2149 // CHECK: br{{.+}}
2150 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2151
2152 // IV = IV + 1 and inner loop latch
2153 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2154 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2155 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2156 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2157 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]]
2158
2159 // check NextLB and NextUB
2160 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2161 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2162
2163 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2164 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2165 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2166 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]]
2167 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]],
2168 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2169 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]],
2170 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]]
2171 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]],
2172 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2173
2174 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2175 // CHECK-DAG: call void @__kmpc_for_static_fini(
2176 // CHECK: ret
2177
2178 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]](
2179 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}})
2180
2181 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]](
2182 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2183 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}},
2184 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2185 // CHECK: ret
2186
2187 // 'parallel for' implementation using outer and inner loops and PrevEUB
2188 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}})
2189 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2190 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2191 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2192 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2193
2194 // initialize lb and ub to PrevLB and PrevUB
2195 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2196 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2197 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2198 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2199 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2200 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2201 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2202 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2203 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2204 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2205 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2206 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2207 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2208 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2209
2210 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2211 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2212 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2213 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2214
2215 // initialize omp.iv (IV = LB)
2216 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2217 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2218 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2219 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2220
2221 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2222 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2223 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2224 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2225 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2226
2227 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2228 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2229 // skip body branch
2230 // CHECK: br{{.+}}
2231 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2232
2233 // IV = IV + 1 and inner loop latch
2234 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2235 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2236 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2237 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2238 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2239
2240 // check NextLB and NextUB
2241 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2242 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2243
2244 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2245 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2246
2247 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2248 // CHECK: ret
2249
2250 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]](
2251 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}})
2252
2253 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]](
2254 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92,
2255 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}},
2256 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case
2257 // CHECK: ret
2258
2259 // 'parallel for' implementation using outer and inner loops and PrevEUB
2260 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}})
2261 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}},
2262 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}},
2263 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}},
2264 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}},
2265
2266 // initialize lb and ub to PrevLB and PrevUB
2267 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]],
2268 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]],
2269 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]],
2270 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}}
2271 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]],
2272 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}}
2273 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]],
2274 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]],
2275 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]],
2276 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]],
2277 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2278 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]],
2279 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}})
2280 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]]
2281
2282 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]:
2283 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]])
2284 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0
2285 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]]
2286
2287 // initialize omp.iv (IV = LB)
2288 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]:
2289 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]],
2290 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]],
2291 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]]
2292
2293 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]:
2294 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2295 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]],
2296 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]]
2297 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]]
2298
2299 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]:
2300 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]],
2301 // skip body branch
2302 // CHECK: br{{.+}}
2303 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]]
2304
2305 // IV = IV + 1 and inner loop latch
2306 // CHECK: [[OMP_PF_INNER_LOOP_INC]]:
2307 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]],
2308 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1
2309 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]],
2310 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]]
2311
2312 // check NextLB and NextUB
2313 // CHECK: [[OMP_PF_INNER_LOOP_END]]:
2314 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]]
2315
2316 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]:
2317 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]]
2318
2319 // CHECK: [[OMP_PF_OUTER_LOOP_END]]:
2320 // CHECK: ret
2321 #endif
2322