1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
3 // RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
4 // RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
5 // RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3
6 // RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
7 // RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4
8 
9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK5
10 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
11 // RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6
12 
13 // RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
14 // RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
15 // RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
16 // RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
17 // RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
18 // RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
19 
20 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
21 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
22 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++  -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
23 
24 // expected-no-diagnostics
25 #ifndef HEADER
26 #define HEADER
27 
28 template <typename T>
tmain()29 T tmain() {
30   T t_var = T();
31   T vec[] = {1, 2};
32 #pragma omp target teams distribute parallel for reduction(+: t_var)
33   for (int i = 0; i < 2; ++i) {
34     t_var += (T) i;
35   }
36   return T();
37 }
38 
main()39 int main() {
40   static int sivar;
41 #ifdef LAMBDA
42 
43   [&]() {
44 #pragma omp target teams distribute parallel for reduction(+: sivar)
45   for (int i = 0; i < 2; ++i) {
46 
47     // Skip global and bound tid vars
48 
49 
50 
51     // Skip global and bound tid vars, and prev lb and ub vars
52     // skip loop vars
53 
54 
55     sivar += i;
56 
57     [&]() {
58 
59       sivar += 4;
60 
61     }();
62   }
63   }();
64   return 0;
65 #else
66 #pragma omp target teams distribute parallel for reduction(+: sivar)
67   for (int i = 0; i < 2; ++i) {
68     sivar += i;
69   }
70   return tmain<int>();
71 #endif
72 }
73 
74 
75 
76 
77 // Skip global and bound tid vars
78 
79 
80 // Skip global and bound tid vars, and prev lb and ub
81 // skip loop vars
82 
83 
84 
85 
86 // Skip global and bound tid vars
87 
88 
89 // Skip global and bound tid vars, and prev lb and ub vars
90 // skip loop vars
91 
92 #endif
93 // CHECK1-LABEL: define {{[^@]+}}@main
94 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
95 // CHECK1-NEXT:  entry:
96 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
97 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
98 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
99 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
100 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
101 // CHECK1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
102 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
103 // CHECK1-NEXT:    [[TMP1:%.*]] = bitcast i8** [[TMP0]] to i32**
104 // CHECK1-NEXT:    store i32* @_ZZ4mainE5sivar, i32** [[TMP1]], align 8
105 // CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
106 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
107 // CHECK1-NEXT:    store i32* @_ZZ4mainE5sivar, i32** [[TMP3]], align 8
108 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
109 // CHECK1-NEXT:    store i8* null, i8** [[TMP4]], align 8
110 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
111 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
112 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 2)
113 // CHECK1-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
114 // CHECK1-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
115 // CHECK1-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
116 // CHECK1:       omp_offload.failed:
117 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(i32* @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
118 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
119 // CHECK1:       omp_offload.cont:
120 // CHECK1-NEXT:    [[CALL:%.*]] = call signext i32 @_Z5tmainIiET_v()
121 // CHECK1-NEXT:    ret i32 [[CALL]]
122 //
123 //
124 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66
125 // CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] {
126 // CHECK1-NEXT:  entry:
127 // CHECK1-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
128 // CHECK1-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
129 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
130 // CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
131 // CHECK1-NEXT:    ret void
132 //
133 //
134 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
135 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
136 // CHECK1-NEXT:  entry:
137 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
138 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
139 // CHECK1-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
140 // CHECK1-NEXT:    [[SIVAR1:%.*]] = alloca i32, align 4
141 // CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
142 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
143 // CHECK1-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
144 // CHECK1-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
145 // CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
146 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
147 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
148 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
149 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
150 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
151 // CHECK1-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
152 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
153 // CHECK1-NEXT:    store i32 0, i32* [[SIVAR1]], align 4
154 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
155 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
156 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
157 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
158 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
159 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
160 // CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
161 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
162 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
163 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
164 // CHECK1:       cond.true:
165 // CHECK1-NEXT:    br label [[COND_END:%.*]]
166 // CHECK1:       cond.false:
167 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
168 // CHECK1-NEXT:    br label [[COND_END]]
169 // CHECK1:       cond.end:
170 // CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
171 // CHECK1-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
172 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
173 // CHECK1-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
174 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
175 // CHECK1:       omp.inner.for.cond:
176 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
177 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
178 // CHECK1-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
179 // CHECK1-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
180 // CHECK1:       omp.inner.for.body:
181 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
182 // CHECK1-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
183 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
184 // CHECK1-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
185 // CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]])
186 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
187 // CHECK1:       omp.inner.for.inc:
188 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
189 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
190 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
191 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
192 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
193 // CHECK1:       omp.inner.for.end:
194 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
195 // CHECK1:       omp.loop.exit:
196 // CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
197 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
198 // CHECK1-NEXT:    [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
199 // CHECK1-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
200 // CHECK1-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
201 // CHECK1-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
202 // CHECK1-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
203 // CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
204 // CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
205 // CHECK1-NEXT:    ]
206 // CHECK1:       .omp.reduction.case1:
207 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
208 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
209 // CHECK1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
210 // CHECK1-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
211 // CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
212 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
213 // CHECK1:       .omp.reduction.case2:
214 // CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
215 // CHECK1-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
216 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
217 // CHECK1:       .omp.reduction.default:
218 // CHECK1-NEXT:    ret void
219 //
220 //
221 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
222 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
223 // CHECK1-NEXT:  entry:
224 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
225 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
226 // CHECK1-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
227 // CHECK1-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
228 // CHECK1-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
229 // CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
230 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
231 // CHECK1-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
232 // CHECK1-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
233 // CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
234 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
235 // CHECK1-NEXT:    [[SIVAR2:%.*]] = alloca i32, align 4
236 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
237 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
238 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
239 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
240 // CHECK1-NEXT:    store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
241 // CHECK1-NEXT:    store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
242 // CHECK1-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
243 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
244 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
245 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
246 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
247 // CHECK1-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
248 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
249 // CHECK1-NEXT:    [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
250 // CHECK1-NEXT:    store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
251 // CHECK1-NEXT:    store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
252 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
253 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
254 // CHECK1-NEXT:    store i32 0, i32* [[SIVAR2]], align 4
255 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
256 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
257 // CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
258 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
259 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
260 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
261 // CHECK1:       cond.true:
262 // CHECK1-NEXT:    br label [[COND_END:%.*]]
263 // CHECK1:       cond.false:
264 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
265 // CHECK1-NEXT:    br label [[COND_END]]
266 // CHECK1:       cond.end:
267 // CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
268 // CHECK1-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
269 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
270 // CHECK1-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
271 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
272 // CHECK1:       omp.inner.for.cond:
273 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
274 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
275 // CHECK1-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
276 // CHECK1-NEXT:    br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
277 // CHECK1:       omp.inner.for.body:
278 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
279 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
280 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
281 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
282 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
283 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4
284 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
285 // CHECK1-NEXT:    store i32 [[ADD4]], i32* [[SIVAR2]], align 4
286 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
287 // CHECK1:       omp.body.continue:
288 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
289 // CHECK1:       omp.inner.for.inc:
290 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
291 // CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
292 // CHECK1-NEXT:    store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4
293 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
294 // CHECK1:       omp.inner.for.end:
295 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
296 // CHECK1:       omp.loop.exit:
297 // CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
298 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
299 // CHECK1-NEXT:    [[TMP15:%.*]] = bitcast i32* [[SIVAR2]] to i8*
300 // CHECK1-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
301 // CHECK1-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
302 // CHECK1-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
303 // CHECK1-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
304 // CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
305 // CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
306 // CHECK1-NEXT:    ]
307 // CHECK1:       .omp.reduction.case1:
308 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
309 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, i32* [[SIVAR2]], align 4
310 // CHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
311 // CHECK1-NEXT:    store i32 [[ADD6]], i32* [[TMP0]], align 4
312 // CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
313 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
314 // CHECK1:       .omp.reduction.case2:
315 // CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR2]], align 4
316 // CHECK1-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
317 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
318 // CHECK1:       .omp.reduction.default:
319 // CHECK1-NEXT:    ret void
320 //
321 //
322 // CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
323 // CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
324 // CHECK1-NEXT:  entry:
325 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
326 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
327 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
328 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
329 // CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
330 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
331 // CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
332 // CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
333 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
334 // CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
335 // CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
336 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
337 // CHECK1-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
338 // CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
339 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
340 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
341 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
342 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
343 // CHECK1-NEXT:    ret void
344 //
345 //
346 // CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
347 // CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
348 // CHECK1-NEXT:  entry:
349 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
350 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
351 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
352 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
353 // CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
354 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
355 // CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
356 // CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
357 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
358 // CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
359 // CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
360 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
361 // CHECK1-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
362 // CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
363 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
364 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
365 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
366 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
367 // CHECK1-NEXT:    ret void
368 //
369 //
370 // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
371 // CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat {
372 // CHECK1-NEXT:  entry:
373 // CHECK1-NEXT:    [[T_VAR:%.*]] = alloca i32, align 4
374 // CHECK1-NEXT:    [[VEC:%.*]] = alloca [2 x i32], align 4
375 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
376 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
377 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
378 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
379 // CHECK1-NEXT:    store i32 0, i32* [[T_VAR]], align 4
380 // CHECK1-NEXT:    [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
381 // CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false)
382 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
383 // CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32**
384 // CHECK1-NEXT:    store i32* [[T_VAR]], i32** [[TMP2]], align 8
385 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
386 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
387 // CHECK1-NEXT:    store i32* [[T_VAR]], i32** [[TMP4]], align 8
388 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
389 // CHECK1-NEXT:    store i8* null, i8** [[TMP5]], align 8
390 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
391 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
392 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 2)
393 // CHECK1-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.7, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
394 // CHECK1-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
395 // CHECK1-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
396 // CHECK1:       omp_offload.failed:
397 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32* [[T_VAR]]) #[[ATTR2]]
398 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
399 // CHECK1:       omp_offload.cont:
400 // CHECK1-NEXT:    ret i32 0
401 //
402 //
403 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32
404 // CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
405 // CHECK1-NEXT:  entry:
406 // CHECK1-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
407 // CHECK1-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
408 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
409 // CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]])
410 // CHECK1-NEXT:    ret void
411 //
412 //
413 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3
414 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
415 // CHECK1-NEXT:  entry:
416 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
417 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
418 // CHECK1-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
419 // CHECK1-NEXT:    [[T_VAR1:%.*]] = alloca i32, align 4
420 // CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
421 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
422 // CHECK1-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
423 // CHECK1-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
424 // CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
425 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
426 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
427 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
428 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
429 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
430 // CHECK1-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
431 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
432 // CHECK1-NEXT:    store i32 0, i32* [[T_VAR1]], align 4
433 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
434 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
435 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
436 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
437 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
438 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
439 // CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
440 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
441 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
442 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
443 // CHECK1:       cond.true:
444 // CHECK1-NEXT:    br label [[COND_END:%.*]]
445 // CHECK1:       cond.false:
446 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
447 // CHECK1-NEXT:    br label [[COND_END]]
448 // CHECK1:       cond.end:
449 // CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
450 // CHECK1-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
451 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
452 // CHECK1-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
453 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
454 // CHECK1:       omp.inner.for.cond:
455 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
456 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
457 // CHECK1-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
458 // CHECK1-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
459 // CHECK1:       omp.inner.for.body:
460 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
461 // CHECK1-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
462 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
463 // CHECK1-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
464 // CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[T_VAR1]])
465 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
466 // CHECK1:       omp.inner.for.inc:
467 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
468 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
469 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
470 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
471 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
472 // CHECK1:       omp.inner.for.end:
473 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
474 // CHECK1:       omp.loop.exit:
475 // CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
476 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
477 // CHECK1-NEXT:    [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8*
478 // CHECK1-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
479 // CHECK1-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
480 // CHECK1-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var)
481 // CHECK1-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
482 // CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
483 // CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
484 // CHECK1-NEXT:    ]
485 // CHECK1:       .omp.reduction.case1:
486 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
487 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4
488 // CHECK1-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
489 // CHECK1-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
490 // CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
491 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
492 // CHECK1:       .omp.reduction.case2:
493 // CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4
494 // CHECK1-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
495 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
496 // CHECK1:       .omp.reduction.default:
497 // CHECK1-NEXT:    ret void
498 //
499 //
500 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4
501 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
502 // CHECK1-NEXT:  entry:
503 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
504 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
505 // CHECK1-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
506 // CHECK1-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
507 // CHECK1-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
508 // CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
509 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32, align 4
510 // CHECK1-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
511 // CHECK1-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
512 // CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
513 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
514 // CHECK1-NEXT:    [[T_VAR2:%.*]] = alloca i32, align 4
515 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
516 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
517 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
518 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
519 // CHECK1-NEXT:    store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
520 // CHECK1-NEXT:    store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
521 // CHECK1-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
522 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
523 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
524 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
525 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
526 // CHECK1-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
527 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
528 // CHECK1-NEXT:    [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
529 // CHECK1-NEXT:    store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
530 // CHECK1-NEXT:    store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
531 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
532 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
533 // CHECK1-NEXT:    store i32 0, i32* [[T_VAR2]], align 4
534 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
535 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
536 // CHECK1-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
537 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
538 // CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
539 // CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
540 // CHECK1:       cond.true:
541 // CHECK1-NEXT:    br label [[COND_END:%.*]]
542 // CHECK1:       cond.false:
543 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
544 // CHECK1-NEXT:    br label [[COND_END]]
545 // CHECK1:       cond.end:
546 // CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
547 // CHECK1-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
548 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
549 // CHECK1-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
550 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
551 // CHECK1:       omp.inner.for.cond:
552 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
553 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
554 // CHECK1-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
555 // CHECK1-NEXT:    br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
556 // CHECK1:       omp.inner.for.body:
557 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
558 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
559 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
560 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
561 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
562 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4
563 // CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
564 // CHECK1-NEXT:    store i32 [[ADD4]], i32* [[T_VAR2]], align 4
565 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
566 // CHECK1:       omp.body.continue:
567 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
568 // CHECK1:       omp.inner.for.inc:
569 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
570 // CHECK1-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
571 // CHECK1-NEXT:    store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4
572 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
573 // CHECK1:       omp.inner.for.end:
574 // CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
575 // CHECK1:       omp.loop.exit:
576 // CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
577 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
578 // CHECK1-NEXT:    [[TMP15:%.*]] = bitcast i32* [[T_VAR2]] to i8*
579 // CHECK1-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
580 // CHECK1-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
581 // CHECK1-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var)
582 // CHECK1-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
583 // CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
584 // CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
585 // CHECK1-NEXT:    ]
586 // CHECK1:       .omp.reduction.case1:
587 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
588 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, i32* [[T_VAR2]], align 4
589 // CHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
590 // CHECK1-NEXT:    store i32 [[ADD6]], i32* [[TMP0]], align 4
591 // CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
592 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
593 // CHECK1:       .omp.reduction.case2:
594 // CHECK1-NEXT:    [[TMP20:%.*]] = load i32, i32* [[T_VAR2]], align 4
595 // CHECK1-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
596 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
597 // CHECK1:       .omp.reduction.default:
598 // CHECK1-NEXT:    ret void
599 //
600 //
601 // CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.5
602 // CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
603 // CHECK1-NEXT:  entry:
604 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
605 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
606 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
607 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
608 // CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
609 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
610 // CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
611 // CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
612 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
613 // CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
614 // CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
615 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
616 // CHECK1-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
617 // CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
618 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
619 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
620 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
621 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
622 // CHECK1-NEXT:    ret void
623 //
624 //
625 // CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.6
626 // CHECK1-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
627 // CHECK1-NEXT:  entry:
628 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
629 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
630 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
631 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
632 // CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
633 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
634 // CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
635 // CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
636 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
637 // CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
638 // CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
639 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
640 // CHECK1-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
641 // CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
642 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
643 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
644 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
645 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
646 // CHECK1-NEXT:    ret void
647 //
648 //
649 // CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
650 // CHECK1-SAME: () #[[ATTR7:[0-9]+]] {
651 // CHECK1-NEXT:  entry:
652 // CHECK1-NEXT:    call void @__tgt_register_requires(i64 1)
653 // CHECK1-NEXT:    ret void
654 //
655 //
656 // CHECK2-LABEL: define {{[^@]+}}@main
657 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
658 // CHECK2-NEXT:  entry:
659 // CHECK2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
660 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
661 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
662 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
663 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
664 // CHECK2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
665 // CHECK2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
666 // CHECK2-NEXT:    [[TMP1:%.*]] = bitcast i8** [[TMP0]] to i32**
667 // CHECK2-NEXT:    store i32* @_ZZ4mainE5sivar, i32** [[TMP1]], align 8
668 // CHECK2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
669 // CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
670 // CHECK2-NEXT:    store i32* @_ZZ4mainE5sivar, i32** [[TMP3]], align 8
671 // CHECK2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
672 // CHECK2-NEXT:    store i8* null, i8** [[TMP4]], align 8
673 // CHECK2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
674 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
675 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 2)
676 // CHECK2-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
677 // CHECK2-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
678 // CHECK2-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
679 // CHECK2:       omp_offload.failed:
680 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(i32* @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
681 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
682 // CHECK2:       omp_offload.cont:
683 // CHECK2-NEXT:    [[CALL:%.*]] = call signext i32 @_Z5tmainIiET_v()
684 // CHECK2-NEXT:    ret i32 [[CALL]]
685 //
686 //
687 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66
688 // CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] {
689 // CHECK2-NEXT:  entry:
690 // CHECK2-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
691 // CHECK2-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
692 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
693 // CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
694 // CHECK2-NEXT:    ret void
695 //
696 //
697 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
698 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
699 // CHECK2-NEXT:  entry:
700 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
701 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
702 // CHECK2-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
703 // CHECK2-NEXT:    [[SIVAR1:%.*]] = alloca i32, align 4
704 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
705 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
706 // CHECK2-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
707 // CHECK2-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
708 // CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
709 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
710 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
711 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
712 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
713 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
714 // CHECK2-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
715 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
716 // CHECK2-NEXT:    store i32 0, i32* [[SIVAR1]], align 4
717 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
718 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
719 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
720 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
721 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
722 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
723 // CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
724 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
725 // CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
726 // CHECK2-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
727 // CHECK2:       cond.true:
728 // CHECK2-NEXT:    br label [[COND_END:%.*]]
729 // CHECK2:       cond.false:
730 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
731 // CHECK2-NEXT:    br label [[COND_END]]
732 // CHECK2:       cond.end:
733 // CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
734 // CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
735 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
736 // CHECK2-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
737 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
738 // CHECK2:       omp.inner.for.cond:
739 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
740 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
741 // CHECK2-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
742 // CHECK2-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
743 // CHECK2:       omp.inner.for.body:
744 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
745 // CHECK2-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
746 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
747 // CHECK2-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
748 // CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]])
749 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
750 // CHECK2:       omp.inner.for.inc:
751 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
752 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
753 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
754 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
755 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
756 // CHECK2:       omp.inner.for.end:
757 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
758 // CHECK2:       omp.loop.exit:
759 // CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
760 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
761 // CHECK2-NEXT:    [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
762 // CHECK2-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
763 // CHECK2-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
764 // CHECK2-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
765 // CHECK2-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
766 // CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
767 // CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
768 // CHECK2-NEXT:    ]
769 // CHECK2:       .omp.reduction.case1:
770 // CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
771 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
772 // CHECK2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
773 // CHECK2-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
774 // CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
775 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
776 // CHECK2:       .omp.reduction.case2:
777 // CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
778 // CHECK2-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
779 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
780 // CHECK2:       .omp.reduction.default:
781 // CHECK2-NEXT:    ret void
782 //
783 //
784 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
785 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
786 // CHECK2-NEXT:  entry:
787 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
788 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
789 // CHECK2-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
790 // CHECK2-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
791 // CHECK2-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
792 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
793 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
794 // CHECK2-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
795 // CHECK2-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
796 // CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
797 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
798 // CHECK2-NEXT:    [[SIVAR2:%.*]] = alloca i32, align 4
799 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
800 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
801 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
802 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
803 // CHECK2-NEXT:    store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
804 // CHECK2-NEXT:    store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
805 // CHECK2-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
806 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
807 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
808 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
809 // CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
810 // CHECK2-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
811 // CHECK2-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
812 // CHECK2-NEXT:    [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
813 // CHECK2-NEXT:    store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
814 // CHECK2-NEXT:    store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
815 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
816 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
817 // CHECK2-NEXT:    store i32 0, i32* [[SIVAR2]], align 4
818 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
819 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
820 // CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
821 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
822 // CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
823 // CHECK2-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
824 // CHECK2:       cond.true:
825 // CHECK2-NEXT:    br label [[COND_END:%.*]]
826 // CHECK2:       cond.false:
827 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
828 // CHECK2-NEXT:    br label [[COND_END]]
829 // CHECK2:       cond.end:
830 // CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
831 // CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
832 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
833 // CHECK2-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
834 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
835 // CHECK2:       omp.inner.for.cond:
836 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
837 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
838 // CHECK2-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
839 // CHECK2-NEXT:    br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
840 // CHECK2:       omp.inner.for.body:
841 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
842 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
843 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
844 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
845 // CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
846 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4
847 // CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
848 // CHECK2-NEXT:    store i32 [[ADD4]], i32* [[SIVAR2]], align 4
849 // CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
850 // CHECK2:       omp.body.continue:
851 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
852 // CHECK2:       omp.inner.for.inc:
853 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
854 // CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
855 // CHECK2-NEXT:    store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4
856 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
857 // CHECK2:       omp.inner.for.end:
858 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
859 // CHECK2:       omp.loop.exit:
860 // CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
861 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
862 // CHECK2-NEXT:    [[TMP15:%.*]] = bitcast i32* [[SIVAR2]] to i8*
863 // CHECK2-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
864 // CHECK2-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
865 // CHECK2-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
866 // CHECK2-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
867 // CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
868 // CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
869 // CHECK2-NEXT:    ]
870 // CHECK2:       .omp.reduction.case1:
871 // CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
872 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[SIVAR2]], align 4
873 // CHECK2-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
874 // CHECK2-NEXT:    store i32 [[ADD6]], i32* [[TMP0]], align 4
875 // CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
876 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
877 // CHECK2:       .omp.reduction.case2:
878 // CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR2]], align 4
879 // CHECK2-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
880 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
881 // CHECK2:       .omp.reduction.default:
882 // CHECK2-NEXT:    ret void
883 //
884 //
885 // CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
886 // CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
887 // CHECK2-NEXT:  entry:
888 // CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
889 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
890 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
891 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
892 // CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
893 // CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
894 // CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
895 // CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
896 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
897 // CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
898 // CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
899 // CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
900 // CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
901 // CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
902 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
903 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
904 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
905 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
906 // CHECK2-NEXT:    ret void
907 //
908 //
909 // CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
910 // CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
911 // CHECK2-NEXT:  entry:
912 // CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
913 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
914 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
915 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
916 // CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
917 // CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
918 // CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
919 // CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
920 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
921 // CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
922 // CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
923 // CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
924 // CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
925 // CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
926 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
927 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
928 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
929 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
930 // CHECK2-NEXT:    ret void
931 //
932 //
933 // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
934 // CHECK2-SAME: () #[[ATTR5:[0-9]+]] comdat {
935 // CHECK2-NEXT:  entry:
936 // CHECK2-NEXT:    [[T_VAR:%.*]] = alloca i32, align 4
937 // CHECK2-NEXT:    [[VEC:%.*]] = alloca [2 x i32], align 4
938 // CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
939 // CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
940 // CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
941 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
942 // CHECK2-NEXT:    store i32 0, i32* [[T_VAR]], align 4
943 // CHECK2-NEXT:    [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
944 // CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false)
945 // CHECK2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
946 // CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32**
947 // CHECK2-NEXT:    store i32* [[T_VAR]], i32** [[TMP2]], align 8
948 // CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
949 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
950 // CHECK2-NEXT:    store i32* [[T_VAR]], i32** [[TMP4]], align 8
951 // CHECK2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
952 // CHECK2-NEXT:    store i8* null, i8** [[TMP5]], align 8
953 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
954 // CHECK2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
955 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 2)
956 // CHECK2-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.7, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
957 // CHECK2-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
958 // CHECK2-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
959 // CHECK2:       omp_offload.failed:
960 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32* [[T_VAR]]) #[[ATTR2]]
961 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
962 // CHECK2:       omp_offload.cont:
963 // CHECK2-NEXT:    ret i32 0
964 //
965 //
966 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32
967 // CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
968 // CHECK2-NEXT:  entry:
969 // CHECK2-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
970 // CHECK2-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
971 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
972 // CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]])
973 // CHECK2-NEXT:    ret void
974 //
975 //
976 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
977 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
978 // CHECK2-NEXT:  entry:
979 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
980 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
981 // CHECK2-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
982 // CHECK2-NEXT:    [[T_VAR1:%.*]] = alloca i32, align 4
983 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
984 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
985 // CHECK2-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
986 // CHECK2-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
987 // CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
988 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
989 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
990 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
991 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
992 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
993 // CHECK2-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
994 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
995 // CHECK2-NEXT:    store i32 0, i32* [[T_VAR1]], align 4
996 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
997 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
998 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
999 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1000 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
1001 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
1002 // CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1003 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1004 // CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
1005 // CHECK2-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1006 // CHECK2:       cond.true:
1007 // CHECK2-NEXT:    br label [[COND_END:%.*]]
1008 // CHECK2:       cond.false:
1009 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1010 // CHECK2-NEXT:    br label [[COND_END]]
1011 // CHECK2:       cond.end:
1012 // CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1013 // CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
1014 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1015 // CHECK2-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
1016 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
1017 // CHECK2:       omp.inner.for.cond:
1018 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1019 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1020 // CHECK2-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
1021 // CHECK2-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1022 // CHECK2:       omp.inner.for.body:
1023 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1024 // CHECK2-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
1025 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1026 // CHECK2-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
1027 // CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[T_VAR1]])
1028 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
1029 // CHECK2:       omp.inner.for.inc:
1030 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1031 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
1032 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
1033 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
1034 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
1035 // CHECK2:       omp.inner.for.end:
1036 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
1037 // CHECK2:       omp.loop.exit:
1038 // CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
1039 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
1040 // CHECK2-NEXT:    [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8*
1041 // CHECK2-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
1042 // CHECK2-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
1043 // CHECK2-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var)
1044 // CHECK2-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
1045 // CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
1046 // CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
1047 // CHECK2-NEXT:    ]
1048 // CHECK2:       .omp.reduction.case1:
1049 // CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
1050 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4
1051 // CHECK2-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
1052 // CHECK2-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
1053 // CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
1054 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1055 // CHECK2:       .omp.reduction.case2:
1056 // CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4
1057 // CHECK2-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
1058 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1059 // CHECK2:       .omp.reduction.default:
1060 // CHECK2-NEXT:    ret void
1061 //
1062 //
1063 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
1064 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
1065 // CHECK2-NEXT:  entry:
1066 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1067 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1068 // CHECK2-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
1069 // CHECK2-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
1070 // CHECK2-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 8
1071 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
1072 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1073 // CHECK2-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
1074 // CHECK2-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
1075 // CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1076 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1077 // CHECK2-NEXT:    [[T_VAR2:%.*]] = alloca i32, align 4
1078 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
1079 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
1080 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1081 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
1082 // CHECK2-NEXT:    store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
1083 // CHECK2-NEXT:    store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
1084 // CHECK2-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8
1085 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8
1086 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
1087 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
1088 // CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
1089 // CHECK2-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
1090 // CHECK2-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
1091 // CHECK2-NEXT:    [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
1092 // CHECK2-NEXT:    store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
1093 // CHECK2-NEXT:    store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
1094 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1095 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1096 // CHECK2-NEXT:    store i32 0, i32* [[T_VAR2]], align 4
1097 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
1098 // CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
1099 // CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1100 // CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1101 // CHECK2-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
1102 // CHECK2-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1103 // CHECK2:       cond.true:
1104 // CHECK2-NEXT:    br label [[COND_END:%.*]]
1105 // CHECK2:       cond.false:
1106 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1107 // CHECK2-NEXT:    br label [[COND_END]]
1108 // CHECK2:       cond.end:
1109 // CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
1110 // CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
1111 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1112 // CHECK2-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
1113 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
1114 // CHECK2:       omp.inner.for.cond:
1115 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1116 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1117 // CHECK2-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
1118 // CHECK2-NEXT:    br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1119 // CHECK2:       omp.inner.for.body:
1120 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1121 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
1122 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1123 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
1124 // CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
1125 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4
1126 // CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
1127 // CHECK2-NEXT:    store i32 [[ADD4]], i32* [[T_VAR2]], align 4
1128 // CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
1129 // CHECK2:       omp.body.continue:
1130 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
1131 // CHECK2:       omp.inner.for.inc:
1132 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1133 // CHECK2-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1
1134 // CHECK2-NEXT:    store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4
1135 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
1136 // CHECK2:       omp.inner.for.end:
1137 // CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
1138 // CHECK2:       omp.loop.exit:
1139 // CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
1140 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
1141 // CHECK2-NEXT:    [[TMP15:%.*]] = bitcast i32* [[T_VAR2]] to i8*
1142 // CHECK2-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
1143 // CHECK2-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
1144 // CHECK2-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var)
1145 // CHECK2-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
1146 // CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
1147 // CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
1148 // CHECK2-NEXT:    ]
1149 // CHECK2:       .omp.reduction.case1:
1150 // CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
1151 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[T_VAR2]], align 4
1152 // CHECK2-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
1153 // CHECK2-NEXT:    store i32 [[ADD6]], i32* [[TMP0]], align 4
1154 // CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
1155 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1156 // CHECK2:       .omp.reduction.case2:
1157 // CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[T_VAR2]], align 4
1158 // CHECK2-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
1159 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1160 // CHECK2:       .omp.reduction.default:
1161 // CHECK2-NEXT:    ret void
1162 //
1163 //
1164 // CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.5
1165 // CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
1166 // CHECK2-NEXT:  entry:
1167 // CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
1168 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
1169 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
1170 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
1171 // CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
1172 // CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
1173 // CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
1174 // CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
1175 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
1176 // CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
1177 // CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
1178 // CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
1179 // CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
1180 // CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
1181 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1182 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
1183 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
1184 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
1185 // CHECK2-NEXT:    ret void
1186 //
1187 //
1188 // CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.6
1189 // CHECK2-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
1190 // CHECK2-NEXT:  entry:
1191 // CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
1192 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
1193 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
1194 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
1195 // CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
1196 // CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
1197 // CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
1198 // CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
1199 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
1200 // CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
1201 // CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
1202 // CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
1203 // CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
1204 // CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
1205 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1206 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
1207 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
1208 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
1209 // CHECK2-NEXT:    ret void
1210 //
1211 //
1212 // CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
1213 // CHECK2-SAME: () #[[ATTR7:[0-9]+]] {
1214 // CHECK2-NEXT:  entry:
1215 // CHECK2-NEXT:    call void @__tgt_register_requires(i64 1)
1216 // CHECK2-NEXT:    ret void
1217 //
1218 //
1219 // CHECK3-LABEL: define {{[^@]+}}@main
1220 // CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
1221 // CHECK3-NEXT:  entry:
1222 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
1223 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4
1224 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4
1225 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4
1226 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1227 // CHECK3-NEXT:    store i32 0, i32* [[RETVAL]], align 4
1228 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1229 // CHECK3-NEXT:    [[TMP1:%.*]] = bitcast i8** [[TMP0]] to i32**
1230 // CHECK3-NEXT:    store i32* @_ZZ4mainE5sivar, i32** [[TMP1]], align 4
1231 // CHECK3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1232 // CHECK3-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
1233 // CHECK3-NEXT:    store i32* @_ZZ4mainE5sivar, i32** [[TMP3]], align 4
1234 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
1235 // CHECK3-NEXT:    store i8* null, i8** [[TMP4]], align 4
1236 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1237 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1238 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 2)
1239 // CHECK3-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
1240 // CHECK3-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
1241 // CHECK3-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
1242 // CHECK3:       omp_offload.failed:
1243 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(i32* @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
1244 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
1245 // CHECK3:       omp_offload.cont:
1246 // CHECK3-NEXT:    [[CALL:%.*]] = call i32 @_Z5tmainIiET_v()
1247 // CHECK3-NEXT:    ret i32 [[CALL]]
1248 //
1249 //
1250 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66
1251 // CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] {
1252 // CHECK3-NEXT:  entry:
1253 // CHECK3-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
1254 // CHECK3-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
1255 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
1256 // CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
1257 // CHECK3-NEXT:    ret void
1258 //
1259 //
1260 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined.
1261 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
1262 // CHECK3-NEXT:  entry:
1263 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1264 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1265 // CHECK3-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
1266 // CHECK3-NEXT:    [[SIVAR1:%.*]] = alloca i32, align 4
1267 // CHECK3-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
1268 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1269 // CHECK3-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1270 // CHECK3-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1271 // CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1272 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1273 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
1274 // CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
1275 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1276 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1277 // CHECK3-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
1278 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
1279 // CHECK3-NEXT:    store i32 0, i32* [[SIVAR1]], align 4
1280 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
1281 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
1282 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1283 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1284 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1285 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
1286 // CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1287 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1288 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
1289 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1290 // CHECK3:       cond.true:
1291 // CHECK3-NEXT:    br label [[COND_END:%.*]]
1292 // CHECK3:       cond.false:
1293 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1294 // CHECK3-NEXT:    br label [[COND_END]]
1295 // CHECK3:       cond.end:
1296 // CHECK3-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1297 // CHECK3-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
1298 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1299 // CHECK3-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
1300 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
1301 // CHECK3:       omp.inner.for.cond:
1302 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1303 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1304 // CHECK3-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
1305 // CHECK3-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1306 // CHECK3:       omp.inner.for.body:
1307 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1308 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1309 // CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[SIVAR1]])
1310 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
1311 // CHECK3:       omp.inner.for.inc:
1312 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1313 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
1314 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
1315 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
1316 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
1317 // CHECK3:       omp.inner.for.end:
1318 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
1319 // CHECK3:       omp.loop.exit:
1320 // CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
1321 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
1322 // CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i32* [[SIVAR1]] to i8*
1323 // CHECK3-NEXT:    store i8* [[TMP13]], i8** [[TMP12]], align 4
1324 // CHECK3-NEXT:    [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
1325 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
1326 // CHECK3-NEXT:    switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
1327 // CHECK3-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
1328 // CHECK3-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
1329 // CHECK3-NEXT:    ]
1330 // CHECK3:       .omp.reduction.case1:
1331 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4
1332 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, i32* [[SIVAR1]], align 4
1333 // CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
1334 // CHECK3-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
1335 // CHECK3-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
1336 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1337 // CHECK3:       .omp.reduction.case2:
1338 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4
1339 // CHECK3-NEXT:    [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4
1340 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1341 // CHECK3:       .omp.reduction.default:
1342 // CHECK3-NEXT:    ret void
1343 //
1344 //
1345 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1
1346 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
1347 // CHECK3-NEXT:  entry:
1348 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1349 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1350 // CHECK3-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1351 // CHECK3-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1352 // CHECK3-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
1353 // CHECK3-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
1354 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1355 // CHECK3-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
1356 // CHECK3-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
1357 // CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1358 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1359 // CHECK3-NEXT:    [[SIVAR1:%.*]] = alloca i32, align 4
1360 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
1361 // CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
1362 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1363 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1364 // CHECK3-NEXT:    store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1365 // CHECK3-NEXT:    store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1366 // CHECK3-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
1367 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
1368 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
1369 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
1370 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1371 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1372 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4
1373 // CHECK3-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4
1374 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1375 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1376 // CHECK3-NEXT:    store i32 0, i32* [[SIVAR1]], align 4
1377 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1378 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
1379 // CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1380 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1381 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
1382 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1383 // CHECK3:       cond.true:
1384 // CHECK3-NEXT:    br label [[COND_END:%.*]]
1385 // CHECK3:       cond.false:
1386 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1387 // CHECK3-NEXT:    br label [[COND_END]]
1388 // CHECK3:       cond.end:
1389 // CHECK3-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
1390 // CHECK3-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
1391 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1392 // CHECK3-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
1393 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
1394 // CHECK3:       omp.inner.for.cond:
1395 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1396 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1397 // CHECK3-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
1398 // CHECK3-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1399 // CHECK3:       omp.inner.for.body:
1400 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1401 // CHECK3-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
1402 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1403 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
1404 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
1405 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[SIVAR1]], align 4
1406 // CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
1407 // CHECK3-NEXT:    store i32 [[ADD3]], i32* [[SIVAR1]], align 4
1408 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
1409 // CHECK3:       omp.body.continue:
1410 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
1411 // CHECK3:       omp.inner.for.inc:
1412 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1413 // CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
1414 // CHECK3-NEXT:    store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
1415 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
1416 // CHECK3:       omp.inner.for.end:
1417 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
1418 // CHECK3:       omp.loop.exit:
1419 // CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
1420 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
1421 // CHECK3-NEXT:    [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
1422 // CHECK3-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 4
1423 // CHECK3-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
1424 // CHECK3-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
1425 // CHECK3-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
1426 // CHECK3-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
1427 // CHECK3-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
1428 // CHECK3-NEXT:    ]
1429 // CHECK3:       .omp.reduction.case1:
1430 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
1431 // CHECK3-NEXT:    [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
1432 // CHECK3-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
1433 // CHECK3-NEXT:    store i32 [[ADD5]], i32* [[TMP0]], align 4
1434 // CHECK3-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
1435 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1436 // CHECK3:       .omp.reduction.case2:
1437 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
1438 // CHECK3-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
1439 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1440 // CHECK3:       .omp.reduction.default:
1441 // CHECK3-NEXT:    ret void
1442 //
1443 //
1444 // CHECK3-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
1445 // CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
1446 // CHECK3-NEXT:  entry:
1447 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
1448 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
1449 // CHECK3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
1450 // CHECK3-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
1451 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
1452 // CHECK3-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
1453 // CHECK3-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
1454 // CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
1455 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
1456 // CHECK3-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
1457 // CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
1458 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
1459 // CHECK3-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
1460 // CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
1461 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1462 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
1463 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
1464 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
1465 // CHECK3-NEXT:    ret void
1466 //
1467 //
1468 // CHECK3-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
1469 // CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
1470 // CHECK3-NEXT:  entry:
1471 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
1472 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
1473 // CHECK3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
1474 // CHECK3-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
1475 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
1476 // CHECK3-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
1477 // CHECK3-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
1478 // CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
1479 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
1480 // CHECK3-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
1481 // CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
1482 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
1483 // CHECK3-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
1484 // CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
1485 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1486 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
1487 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
1488 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
1489 // CHECK3-NEXT:    ret void
1490 //
1491 //
1492 // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
1493 // CHECK3-SAME: () #[[ATTR5:[0-9]+]] comdat {
1494 // CHECK3-NEXT:  entry:
1495 // CHECK3-NEXT:    [[T_VAR:%.*]] = alloca i32, align 4
1496 // CHECK3-NEXT:    [[VEC:%.*]] = alloca [2 x i32], align 4
1497 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4
1498 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4
1499 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4
1500 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1501 // CHECK3-NEXT:    store i32 0, i32* [[T_VAR]], align 4
1502 // CHECK3-NEXT:    [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
1503 // CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false)
1504 // CHECK3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1505 // CHECK3-NEXT:    [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32**
1506 // CHECK3-NEXT:    store i32* [[T_VAR]], i32** [[TMP2]], align 4
1507 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1508 // CHECK3-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
1509 // CHECK3-NEXT:    store i32* [[T_VAR]], i32** [[TMP4]], align 4
1510 // CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
1511 // CHECK3-NEXT:    store i8* null, i8** [[TMP5]], align 4
1512 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1513 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1514 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 2)
1515 // CHECK3-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.7, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
1516 // CHECK3-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
1517 // CHECK3-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
1518 // CHECK3:       omp_offload.failed:
1519 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32* [[T_VAR]]) #[[ATTR2]]
1520 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
1521 // CHECK3:       omp_offload.cont:
1522 // CHECK3-NEXT:    ret i32 0
1523 //
1524 //
1525 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32
1526 // CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
1527 // CHECK3-NEXT:  entry:
1528 // CHECK3-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
1529 // CHECK3-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
1530 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
1531 // CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]])
1532 // CHECK3-NEXT:    ret void
1533 //
1534 //
1535 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3
1536 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
1537 // CHECK3-NEXT:  entry:
1538 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1539 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1540 // CHECK3-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
1541 // CHECK3-NEXT:    [[T_VAR1:%.*]] = alloca i32, align 4
1542 // CHECK3-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
1543 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1544 // CHECK3-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1545 // CHECK3-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1546 // CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1547 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1548 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
1549 // CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
1550 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1551 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1552 // CHECK3-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
1553 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
1554 // CHECK3-NEXT:    store i32 0, i32* [[T_VAR1]], align 4
1555 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
1556 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
1557 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1558 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1559 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1560 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
1561 // CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1562 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1563 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
1564 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1565 // CHECK3:       cond.true:
1566 // CHECK3-NEXT:    br label [[COND_END:%.*]]
1567 // CHECK3:       cond.false:
1568 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1569 // CHECK3-NEXT:    br label [[COND_END]]
1570 // CHECK3:       cond.end:
1571 // CHECK3-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1572 // CHECK3-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
1573 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1574 // CHECK3-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
1575 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
1576 // CHECK3:       omp.inner.for.cond:
1577 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1578 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1579 // CHECK3-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
1580 // CHECK3-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1581 // CHECK3:       omp.inner.for.body:
1582 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1583 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1584 // CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[T_VAR1]])
1585 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
1586 // CHECK3:       omp.inner.for.inc:
1587 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1588 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
1589 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
1590 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
1591 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
1592 // CHECK3:       omp.inner.for.end:
1593 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
1594 // CHECK3:       omp.loop.exit:
1595 // CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
1596 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
1597 // CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i32* [[T_VAR1]] to i8*
1598 // CHECK3-NEXT:    store i8* [[TMP13]], i8** [[TMP12]], align 4
1599 // CHECK3-NEXT:    [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
1600 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var)
1601 // CHECK3-NEXT:    switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
1602 // CHECK3-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
1603 // CHECK3-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
1604 // CHECK3-NEXT:    ]
1605 // CHECK3:       .omp.reduction.case1:
1606 // CHECK3-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4
1607 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, i32* [[T_VAR1]], align 4
1608 // CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
1609 // CHECK3-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
1610 // CHECK3-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
1611 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1612 // CHECK3:       .omp.reduction.case2:
1613 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4
1614 // CHECK3-NEXT:    [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4
1615 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1616 // CHECK3:       .omp.reduction.default:
1617 // CHECK3-NEXT:    ret void
1618 //
1619 //
1620 // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4
1621 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
1622 // CHECK3-NEXT:  entry:
1623 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1624 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1625 // CHECK3-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1626 // CHECK3-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1627 // CHECK3-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
1628 // CHECK3-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
1629 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1630 // CHECK3-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
1631 // CHECK3-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
1632 // CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1633 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1634 // CHECK3-NEXT:    [[T_VAR1:%.*]] = alloca i32, align 4
1635 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
1636 // CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
1637 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1638 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1639 // CHECK3-NEXT:    store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1640 // CHECK3-NEXT:    store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1641 // CHECK3-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
1642 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
1643 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
1644 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
1645 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1646 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1647 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4
1648 // CHECK3-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4
1649 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1650 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1651 // CHECK3-NEXT:    store i32 0, i32* [[T_VAR1]], align 4
1652 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1653 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
1654 // CHECK3-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1655 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1656 // CHECK3-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
1657 // CHECK3-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1658 // CHECK3:       cond.true:
1659 // CHECK3-NEXT:    br label [[COND_END:%.*]]
1660 // CHECK3:       cond.false:
1661 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1662 // CHECK3-NEXT:    br label [[COND_END]]
1663 // CHECK3:       cond.end:
1664 // CHECK3-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
1665 // CHECK3-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
1666 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1667 // CHECK3-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
1668 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
1669 // CHECK3:       omp.inner.for.cond:
1670 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1671 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1672 // CHECK3-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
1673 // CHECK3-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1674 // CHECK3:       omp.inner.for.body:
1675 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1676 // CHECK3-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
1677 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1678 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
1679 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
1680 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[T_VAR1]], align 4
1681 // CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
1682 // CHECK3-NEXT:    store i32 [[ADD3]], i32* [[T_VAR1]], align 4
1683 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
1684 // CHECK3:       omp.body.continue:
1685 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
1686 // CHECK3:       omp.inner.for.inc:
1687 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1688 // CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
1689 // CHECK3-NEXT:    store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
1690 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
1691 // CHECK3:       omp.inner.for.end:
1692 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
1693 // CHECK3:       omp.loop.exit:
1694 // CHECK3-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
1695 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
1696 // CHECK3-NEXT:    [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8*
1697 // CHECK3-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 4
1698 // CHECK3-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
1699 // CHECK3-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var)
1700 // CHECK3-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
1701 // CHECK3-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
1702 // CHECK3-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
1703 // CHECK3-NEXT:    ]
1704 // CHECK3:       .omp.reduction.case1:
1705 // CHECK3-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
1706 // CHECK3-NEXT:    [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4
1707 // CHECK3-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
1708 // CHECK3-NEXT:    store i32 [[ADD5]], i32* [[TMP0]], align 4
1709 // CHECK3-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
1710 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1711 // CHECK3:       .omp.reduction.case2:
1712 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4
1713 // CHECK3-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
1714 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1715 // CHECK3:       .omp.reduction.default:
1716 // CHECK3-NEXT:    ret void
1717 //
1718 //
1719 // CHECK3-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.5
1720 // CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
1721 // CHECK3-NEXT:  entry:
1722 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
1723 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
1724 // CHECK3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
1725 // CHECK3-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
1726 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
1727 // CHECK3-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
1728 // CHECK3-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
1729 // CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
1730 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
1731 // CHECK3-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
1732 // CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
1733 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
1734 // CHECK3-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
1735 // CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
1736 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1737 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
1738 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
1739 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
1740 // CHECK3-NEXT:    ret void
1741 //
1742 //
1743 // CHECK3-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.6
1744 // CHECK3-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
1745 // CHECK3-NEXT:  entry:
1746 // CHECK3-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
1747 // CHECK3-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
1748 // CHECK3-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
1749 // CHECK3-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
1750 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
1751 // CHECK3-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
1752 // CHECK3-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
1753 // CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
1754 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
1755 // CHECK3-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
1756 // CHECK3-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
1757 // CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
1758 // CHECK3-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
1759 // CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
1760 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1761 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
1762 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
1763 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
1764 // CHECK3-NEXT:    ret void
1765 //
1766 //
1767 // CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
1768 // CHECK3-SAME: () #[[ATTR7:[0-9]+]] {
1769 // CHECK3-NEXT:  entry:
1770 // CHECK3-NEXT:    call void @__tgt_register_requires(i64 1)
1771 // CHECK3-NEXT:    ret void
1772 //
1773 //
1774 // CHECK4-LABEL: define {{[^@]+}}@main
1775 // CHECK4-SAME: () #[[ATTR0:[0-9]+]] {
1776 // CHECK4-NEXT:  entry:
1777 // CHECK4-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
1778 // CHECK4-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4
1779 // CHECK4-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4
1780 // CHECK4-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4
1781 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1782 // CHECK4-NEXT:    store i32 0, i32* [[RETVAL]], align 4
1783 // CHECK4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1784 // CHECK4-NEXT:    [[TMP1:%.*]] = bitcast i8** [[TMP0]] to i32**
1785 // CHECK4-NEXT:    store i32* @_ZZ4mainE5sivar, i32** [[TMP1]], align 4
1786 // CHECK4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1787 // CHECK4-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32**
1788 // CHECK4-NEXT:    store i32* @_ZZ4mainE5sivar, i32** [[TMP3]], align 4
1789 // CHECK4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
1790 // CHECK4-NEXT:    store i8* null, i8** [[TMP4]], align 4
1791 // CHECK4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
1792 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
1793 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 2)
1794 // CHECK4-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
1795 // CHECK4-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
1796 // CHECK4-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
1797 // CHECK4:       omp_offload.failed:
1798 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(i32* @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]]
1799 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT]]
1800 // CHECK4:       omp_offload.cont:
1801 // CHECK4-NEXT:    [[CALL:%.*]] = call i32 @_Z5tmainIiET_v()
1802 // CHECK4-NEXT:    ret i32 [[CALL]]
1803 //
1804 //
1805 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66
1806 // CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] {
1807 // CHECK4-NEXT:  entry:
1808 // CHECK4-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
1809 // CHECK4-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
1810 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
1811 // CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
1812 // CHECK4-NEXT:    ret void
1813 //
1814 //
1815 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined.
1816 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
1817 // CHECK4-NEXT:  entry:
1818 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1819 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1820 // CHECK4-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
1821 // CHECK4-NEXT:    [[SIVAR1:%.*]] = alloca i32, align 4
1822 // CHECK4-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
1823 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1824 // CHECK4-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
1825 // CHECK4-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
1826 // CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1827 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1828 // CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
1829 // CHECK4-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
1830 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1831 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1832 // CHECK4-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
1833 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
1834 // CHECK4-NEXT:    store i32 0, i32* [[SIVAR1]], align 4
1835 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
1836 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
1837 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1838 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1839 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1840 // CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
1841 // CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1842 // CHECK4-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1843 // CHECK4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
1844 // CHECK4-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1845 // CHECK4:       cond.true:
1846 // CHECK4-NEXT:    br label [[COND_END:%.*]]
1847 // CHECK4:       cond.false:
1848 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1849 // CHECK4-NEXT:    br label [[COND_END]]
1850 // CHECK4:       cond.end:
1851 // CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
1852 // CHECK4-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
1853 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1854 // CHECK4-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
1855 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
1856 // CHECK4:       omp.inner.for.cond:
1857 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1858 // CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1859 // CHECK4-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
1860 // CHECK4-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1861 // CHECK4:       omp.inner.for.body:
1862 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
1863 // CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
1864 // CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[SIVAR1]])
1865 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
1866 // CHECK4:       omp.inner.for.inc:
1867 // CHECK4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1868 // CHECK4-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
1869 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
1870 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
1871 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
1872 // CHECK4:       omp.inner.for.end:
1873 // CHECK4-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
1874 // CHECK4:       omp.loop.exit:
1875 // CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
1876 // CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
1877 // CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i32* [[SIVAR1]] to i8*
1878 // CHECK4-NEXT:    store i8* [[TMP13]], i8** [[TMP12]], align 4
1879 // CHECK4-NEXT:    [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
1880 // CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
1881 // CHECK4-NEXT:    switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
1882 // CHECK4-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
1883 // CHECK4-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
1884 // CHECK4-NEXT:    ]
1885 // CHECK4:       .omp.reduction.case1:
1886 // CHECK4-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4
1887 // CHECK4-NEXT:    [[TMP17:%.*]] = load i32, i32* [[SIVAR1]], align 4
1888 // CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
1889 // CHECK4-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
1890 // CHECK4-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
1891 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1892 // CHECK4:       .omp.reduction.case2:
1893 // CHECK4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4
1894 // CHECK4-NEXT:    [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4
1895 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1896 // CHECK4:       .omp.reduction.default:
1897 // CHECK4-NEXT:    ret void
1898 //
1899 //
1900 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1
1901 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] {
1902 // CHECK4-NEXT:  entry:
1903 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
1904 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
1905 // CHECK4-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
1906 // CHECK4-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
1907 // CHECK4-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 4
1908 // CHECK4-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
1909 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
1910 // CHECK4-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
1911 // CHECK4-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
1912 // CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
1913 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
1914 // CHECK4-NEXT:    [[SIVAR1:%.*]] = alloca i32, align 4
1915 // CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
1916 // CHECK4-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
1917 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
1918 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
1919 // CHECK4-NEXT:    store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1920 // CHECK4-NEXT:    store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1921 // CHECK4-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4
1922 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4
1923 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
1924 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
1925 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
1926 // CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
1927 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4
1928 // CHECK4-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4
1929 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
1930 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
1931 // CHECK4-NEXT:    store i32 0, i32* [[SIVAR1]], align 4
1932 // CHECK4-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1933 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
1934 // CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
1935 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1936 // CHECK4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
1937 // CHECK4-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
1938 // CHECK4:       cond.true:
1939 // CHECK4-NEXT:    br label [[COND_END:%.*]]
1940 // CHECK4:       cond.false:
1941 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1942 // CHECK4-NEXT:    br label [[COND_END]]
1943 // CHECK4:       cond.end:
1944 // CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
1945 // CHECK4-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
1946 // CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
1947 // CHECK4-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
1948 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
1949 // CHECK4:       omp.inner.for.cond:
1950 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1951 // CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
1952 // CHECK4-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
1953 // CHECK4-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
1954 // CHECK4:       omp.inner.for.body:
1955 // CHECK4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1956 // CHECK4-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
1957 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
1958 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
1959 // CHECK4-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
1960 // CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[SIVAR1]], align 4
1961 // CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
1962 // CHECK4-NEXT:    store i32 [[ADD3]], i32* [[SIVAR1]], align 4
1963 // CHECK4-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
1964 // CHECK4:       omp.body.continue:
1965 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
1966 // CHECK4:       omp.inner.for.inc:
1967 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
1968 // CHECK4-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
1969 // CHECK4-NEXT:    store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
1970 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
1971 // CHECK4:       omp.inner.for.end:
1972 // CHECK4-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
1973 // CHECK4:       omp.loop.exit:
1974 // CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
1975 // CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
1976 // CHECK4-NEXT:    [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
1977 // CHECK4-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 4
1978 // CHECK4-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
1979 // CHECK4-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
1980 // CHECK4-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
1981 // CHECK4-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
1982 // CHECK4-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
1983 // CHECK4-NEXT:    ]
1984 // CHECK4:       .omp.reduction.case1:
1985 // CHECK4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
1986 // CHECK4-NEXT:    [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
1987 // CHECK4-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
1988 // CHECK4-NEXT:    store i32 [[ADD5]], i32* [[TMP0]], align 4
1989 // CHECK4-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
1990 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1991 // CHECK4:       .omp.reduction.case2:
1992 // CHECK4-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
1993 // CHECK4-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
1994 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
1995 // CHECK4:       .omp.reduction.default:
1996 // CHECK4-NEXT:    ret void
1997 //
1998 //
1999 // CHECK4-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
2000 // CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
2001 // CHECK4-NEXT:  entry:
2002 // CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
2003 // CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
2004 // CHECK4-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
2005 // CHECK4-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
2006 // CHECK4-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
2007 // CHECK4-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
2008 // CHECK4-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
2009 // CHECK4-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
2010 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
2011 // CHECK4-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
2012 // CHECK4-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
2013 // CHECK4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
2014 // CHECK4-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
2015 // CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
2016 // CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
2017 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
2018 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2019 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
2020 // CHECK4-NEXT:    ret void
2021 //
2022 //
2023 // CHECK4-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
2024 // CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
2025 // CHECK4-NEXT:  entry:
2026 // CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
2027 // CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
2028 // CHECK4-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
2029 // CHECK4-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
2030 // CHECK4-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
2031 // CHECK4-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
2032 // CHECK4-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
2033 // CHECK4-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
2034 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
2035 // CHECK4-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
2036 // CHECK4-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
2037 // CHECK4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
2038 // CHECK4-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
2039 // CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
2040 // CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
2041 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
2042 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2043 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
2044 // CHECK4-NEXT:    ret void
2045 //
2046 //
2047 // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
2048 // CHECK4-SAME: () #[[ATTR5:[0-9]+]] comdat {
2049 // CHECK4-NEXT:  entry:
2050 // CHECK4-NEXT:    [[T_VAR:%.*]] = alloca i32, align 4
2051 // CHECK4-NEXT:    [[VEC:%.*]] = alloca [2 x i32], align 4
2052 // CHECK4-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4
2053 // CHECK4-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4
2054 // CHECK4-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4
2055 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
2056 // CHECK4-NEXT:    store i32 0, i32* [[T_VAR]], align 4
2057 // CHECK4-NEXT:    [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8*
2058 // CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false)
2059 // CHECK4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2060 // CHECK4-NEXT:    [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32**
2061 // CHECK4-NEXT:    store i32* [[T_VAR]], i32** [[TMP2]], align 4
2062 // CHECK4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2063 // CHECK4-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
2064 // CHECK4-NEXT:    store i32* [[T_VAR]], i32** [[TMP4]], align 4
2065 // CHECK4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
2066 // CHECK4-NEXT:    store i8* null, i8** [[TMP5]], align 4
2067 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
2068 // CHECK4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
2069 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 2)
2070 // CHECK4-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.7, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
2071 // CHECK4-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
2072 // CHECK4-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
2073 // CHECK4:       omp_offload.failed:
2074 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32* [[T_VAR]]) #[[ATTR2]]
2075 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT]]
2076 // CHECK4:       omp_offload.cont:
2077 // CHECK4-NEXT:    ret i32 0
2078 //
2079 //
2080 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32
2081 // CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
2082 // CHECK4-NEXT:  entry:
2083 // CHECK4-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
2084 // CHECK4-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
2085 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
2086 // CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]])
2087 // CHECK4-NEXT:    ret void
2088 //
2089 //
2090 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3
2091 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
2092 // CHECK4-NEXT:  entry:
2093 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
2094 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
2095 // CHECK4-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
2096 // CHECK4-NEXT:    [[T_VAR1:%.*]] = alloca i32, align 4
2097 // CHECK4-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
2098 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
2099 // CHECK4-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
2100 // CHECK4-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
2101 // CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2102 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2103 // CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
2104 // CHECK4-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
2105 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
2106 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
2107 // CHECK4-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
2108 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
2109 // CHECK4-NEXT:    store i32 0, i32* [[T_VAR1]], align 4
2110 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
2111 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
2112 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
2113 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
2114 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
2115 // CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
2116 // CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
2117 // CHECK4-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2118 // CHECK4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
2119 // CHECK4-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2120 // CHECK4:       cond.true:
2121 // CHECK4-NEXT:    br label [[COND_END:%.*]]
2122 // CHECK4:       cond.false:
2123 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2124 // CHECK4-NEXT:    br label [[COND_END]]
2125 // CHECK4:       cond.end:
2126 // CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
2127 // CHECK4-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
2128 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
2129 // CHECK4-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
2130 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
2131 // CHECK4:       omp.inner.for.cond:
2132 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2133 // CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2134 // CHECK4-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
2135 // CHECK4-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2136 // CHECK4:       omp.inner.for.body:
2137 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
2138 // CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2139 // CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[T_VAR1]])
2140 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
2141 // CHECK4:       omp.inner.for.inc:
2142 // CHECK4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2143 // CHECK4-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
2144 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
2145 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
2146 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
2147 // CHECK4:       omp.inner.for.end:
2148 // CHECK4-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
2149 // CHECK4:       omp.loop.exit:
2150 // CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
2151 // CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
2152 // CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i32* [[T_VAR1]] to i8*
2153 // CHECK4-NEXT:    store i8* [[TMP13]], i8** [[TMP12]], align 4
2154 // CHECK4-NEXT:    [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
2155 // CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var)
2156 // CHECK4-NEXT:    switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
2157 // CHECK4-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
2158 // CHECK4-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
2159 // CHECK4-NEXT:    ]
2160 // CHECK4:       .omp.reduction.case1:
2161 // CHECK4-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4
2162 // CHECK4-NEXT:    [[TMP17:%.*]] = load i32, i32* [[T_VAR1]], align 4
2163 // CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
2164 // CHECK4-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
2165 // CHECK4-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
2166 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2167 // CHECK4:       .omp.reduction.case2:
2168 // CHECK4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4
2169 // CHECK4-NEXT:    [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4
2170 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2171 // CHECK4:       .omp.reduction.default:
2172 // CHECK4-NEXT:    ret void
2173 //
2174 //
2175 // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..4
2176 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] {
2177 // CHECK4-NEXT:  entry:
2178 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
2179 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
2180 // CHECK4-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
2181 // CHECK4-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
2182 // CHECK4-NEXT:    [[T_VAR_ADDR:%.*]] = alloca i32*, align 4
2183 // CHECK4-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
2184 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32, align 4
2185 // CHECK4-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
2186 // CHECK4-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
2187 // CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2188 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2189 // CHECK4-NEXT:    [[T_VAR1:%.*]] = alloca i32, align 4
2190 // CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
2191 // CHECK4-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
2192 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
2193 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
2194 // CHECK4-NEXT:    store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
2195 // CHECK4-NEXT:    store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
2196 // CHECK4-NEXT:    store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4
2197 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4
2198 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
2199 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
2200 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
2201 // CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
2202 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4
2203 // CHECK4-NEXT:    store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4
2204 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
2205 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
2206 // CHECK4-NEXT:    store i32 0, i32* [[T_VAR1]], align 4
2207 // CHECK4-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
2208 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
2209 // CHECK4-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
2210 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2211 // CHECK4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
2212 // CHECK4-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2213 // CHECK4:       cond.true:
2214 // CHECK4-NEXT:    br label [[COND_END:%.*]]
2215 // CHECK4:       cond.false:
2216 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2217 // CHECK4-NEXT:    br label [[COND_END]]
2218 // CHECK4:       cond.end:
2219 // CHECK4-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
2220 // CHECK4-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
2221 // CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
2222 // CHECK4-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
2223 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
2224 // CHECK4:       omp.inner.for.cond:
2225 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2226 // CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2227 // CHECK4-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
2228 // CHECK4-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2229 // CHECK4:       omp.inner.for.body:
2230 // CHECK4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2231 // CHECK4-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
2232 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2233 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
2234 // CHECK4-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
2235 // CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[T_VAR1]], align 4
2236 // CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
2237 // CHECK4-NEXT:    store i32 [[ADD3]], i32* [[T_VAR1]], align 4
2238 // CHECK4-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
2239 // CHECK4:       omp.body.continue:
2240 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
2241 // CHECK4:       omp.inner.for.inc:
2242 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2243 // CHECK4-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
2244 // CHECK4-NEXT:    store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4
2245 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
2246 // CHECK4:       omp.inner.for.end:
2247 // CHECK4-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
2248 // CHECK4:       omp.loop.exit:
2249 // CHECK4-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
2250 // CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
2251 // CHECK4-NEXT:    [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8*
2252 // CHECK4-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 4
2253 // CHECK4-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
2254 // CHECK4-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var)
2255 // CHECK4-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
2256 // CHECK4-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
2257 // CHECK4-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
2258 // CHECK4-NEXT:    ]
2259 // CHECK4:       .omp.reduction.case1:
2260 // CHECK4-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
2261 // CHECK4-NEXT:    [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4
2262 // CHECK4-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
2263 // CHECK4-NEXT:    store i32 [[ADD5]], i32* [[TMP0]], align 4
2264 // CHECK4-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
2265 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2266 // CHECK4:       .omp.reduction.case2:
2267 // CHECK4-NEXT:    [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4
2268 // CHECK4-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
2269 // CHECK4-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2270 // CHECK4:       .omp.reduction.default:
2271 // CHECK4-NEXT:    ret void
2272 //
2273 //
2274 // CHECK4-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.5
2275 // CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
2276 // CHECK4-NEXT:  entry:
2277 // CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
2278 // CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
2279 // CHECK4-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
2280 // CHECK4-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
2281 // CHECK4-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
2282 // CHECK4-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
2283 // CHECK4-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
2284 // CHECK4-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
2285 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
2286 // CHECK4-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
2287 // CHECK4-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
2288 // CHECK4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
2289 // CHECK4-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
2290 // CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
2291 // CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
2292 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
2293 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2294 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
2295 // CHECK4-NEXT:    ret void
2296 //
2297 //
2298 // CHECK4-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.6
2299 // CHECK4-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR3]] {
2300 // CHECK4-NEXT:  entry:
2301 // CHECK4-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
2302 // CHECK4-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
2303 // CHECK4-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
2304 // CHECK4-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
2305 // CHECK4-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
2306 // CHECK4-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
2307 // CHECK4-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
2308 // CHECK4-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
2309 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
2310 // CHECK4-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
2311 // CHECK4-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
2312 // CHECK4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
2313 // CHECK4-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
2314 // CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
2315 // CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
2316 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
2317 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2318 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
2319 // CHECK4-NEXT:    ret void
2320 //
2321 //
2322 // CHECK4-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
2323 // CHECK4-SAME: () #[[ATTR7:[0-9]+]] {
2324 // CHECK4-NEXT:  entry:
2325 // CHECK4-NEXT:    call void @__tgt_register_requires(i64 1)
2326 // CHECK4-NEXT:    ret void
2327 //
2328 //
2329 // CHECK5-LABEL: define {{[^@]+}}@main
2330 // CHECK5-SAME: () #[[ATTR0:[0-9]+]] {
2331 // CHECK5-NEXT:  entry:
2332 // CHECK5-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
2333 // CHECK5-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
2334 // CHECK5-NEXT:    store i32 0, i32* [[RETVAL]], align 4
2335 // CHECK5-NEXT:    call void @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 1 dereferenceable(1) [[REF_TMP]])
2336 // CHECK5-NEXT:    ret i32 0
2337 //
2338 //
2339 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44
2340 // CHECK5-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
2341 // CHECK5-NEXT:  entry:
2342 // CHECK5-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
2343 // CHECK5-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
2344 // CHECK5-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
2345 // CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
2346 // CHECK5-NEXT:    ret void
2347 //
2348 //
2349 // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined.
2350 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] {
2351 // CHECK5-NEXT:  entry:
2352 // CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
2353 // CHECK5-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
2354 // CHECK5-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
2355 // CHECK5-NEXT:    [[SIVAR1:%.*]] = alloca i32, align 4
2356 // CHECK5-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
2357 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
2358 // CHECK5-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
2359 // CHECK5-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
2360 // CHECK5-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2361 // CHECK5-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2362 // CHECK5-NEXT:    [[I:%.*]] = alloca i32, align 4
2363 // CHECK5-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
2364 // CHECK5-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
2365 // CHECK5-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
2366 // CHECK5-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
2367 // CHECK5-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
2368 // CHECK5-NEXT:    store i32 0, i32* [[SIVAR1]], align 4
2369 // CHECK5-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
2370 // CHECK5-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
2371 // CHECK5-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
2372 // CHECK5-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
2373 // CHECK5-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
2374 // CHECK5-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
2375 // CHECK5-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
2376 // CHECK5-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2377 // CHECK5-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
2378 // CHECK5-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2379 // CHECK5:       cond.true:
2380 // CHECK5-NEXT:    br label [[COND_END:%.*]]
2381 // CHECK5:       cond.false:
2382 // CHECK5-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2383 // CHECK5-NEXT:    br label [[COND_END]]
2384 // CHECK5:       cond.end:
2385 // CHECK5-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
2386 // CHECK5-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
2387 // CHECK5-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
2388 // CHECK5-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
2389 // CHECK5-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
2390 // CHECK5:       omp.inner.for.cond:
2391 // CHECK5-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2392 // CHECK5-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2393 // CHECK5-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
2394 // CHECK5-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2395 // CHECK5:       omp.inner.for.body:
2396 // CHECK5-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
2397 // CHECK5-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
2398 // CHECK5-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2399 // CHECK5-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
2400 // CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]])
2401 // CHECK5-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
2402 // CHECK5:       omp.inner.for.inc:
2403 // CHECK5-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2404 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
2405 // CHECK5-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2406 // CHECK5-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
2407 // CHECK5-NEXT:    br label [[OMP_INNER_FOR_COND]]
2408 // CHECK5:       omp.inner.for.end:
2409 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
2410 // CHECK5:       omp.loop.exit:
2411 // CHECK5-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
2412 // CHECK5-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
2413 // CHECK5-NEXT:    [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
2414 // CHECK5-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
2415 // CHECK5-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
2416 // CHECK5-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
2417 // CHECK5-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
2418 // CHECK5-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
2419 // CHECK5-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
2420 // CHECK5-NEXT:    ]
2421 // CHECK5:       .omp.reduction.case1:
2422 // CHECK5-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
2423 // CHECK5-NEXT:    [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
2424 // CHECK5-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
2425 // CHECK5-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
2426 // CHECK5-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
2427 // CHECK5-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2428 // CHECK5:       .omp.reduction.case2:
2429 // CHECK5-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
2430 // CHECK5-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
2431 // CHECK5-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2432 // CHECK5:       .omp.reduction.default:
2433 // CHECK5-NEXT:    ret void
2434 //
2435 //
2436 // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1
2437 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] {
2438 // CHECK5-NEXT:  entry:
2439 // CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
2440 // CHECK5-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
2441 // CHECK5-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
2442 // CHECK5-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
2443 // CHECK5-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
2444 // CHECK5-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
2445 // CHECK5-NEXT:    [[TMP:%.*]] = alloca i32, align 4
2446 // CHECK5-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
2447 // CHECK5-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
2448 // CHECK5-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2449 // CHECK5-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2450 // CHECK5-NEXT:    [[SIVAR2:%.*]] = alloca i32, align 4
2451 // CHECK5-NEXT:    [[I:%.*]] = alloca i32, align 4
2452 // CHECK5-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
2453 // CHECK5-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
2454 // CHECK5-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
2455 // CHECK5-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
2456 // CHECK5-NEXT:    store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
2457 // CHECK5-NEXT:    store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
2458 // CHECK5-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
2459 // CHECK5-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
2460 // CHECK5-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
2461 // CHECK5-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
2462 // CHECK5-NEXT:    [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
2463 // CHECK5-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
2464 // CHECK5-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
2465 // CHECK5-NEXT:    [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
2466 // CHECK5-NEXT:    store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
2467 // CHECK5-NEXT:    store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
2468 // CHECK5-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
2469 // CHECK5-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
2470 // CHECK5-NEXT:    store i32 0, i32* [[SIVAR2]], align 4
2471 // CHECK5-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
2472 // CHECK5-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
2473 // CHECK5-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
2474 // CHECK5-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2475 // CHECK5-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
2476 // CHECK5-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2477 // CHECK5:       cond.true:
2478 // CHECK5-NEXT:    br label [[COND_END:%.*]]
2479 // CHECK5:       cond.false:
2480 // CHECK5-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2481 // CHECK5-NEXT:    br label [[COND_END]]
2482 // CHECK5:       cond.end:
2483 // CHECK5-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
2484 // CHECK5-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
2485 // CHECK5-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
2486 // CHECK5-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
2487 // CHECK5-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
2488 // CHECK5:       omp.inner.for.cond:
2489 // CHECK5-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2490 // CHECK5-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2491 // CHECK5-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
2492 // CHECK5-NEXT:    br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2493 // CHECK5:       omp.inner.for.body:
2494 // CHECK5-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2495 // CHECK5-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
2496 // CHECK5-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2497 // CHECK5-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
2498 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
2499 // CHECK5-NEXT:    [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4
2500 // CHECK5-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
2501 // CHECK5-NEXT:    store i32 [[ADD4]], i32* [[SIVAR2]], align 4
2502 // CHECK5-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
2503 // CHECK5-NEXT:    store i32* [[SIVAR2]], i32** [[TMP13]], align 8
2504 // CHECK5-NEXT:    call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(8) [[REF_TMP]])
2505 // CHECK5-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
2506 // CHECK5:       omp.body.continue:
2507 // CHECK5-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
2508 // CHECK5:       omp.inner.for.inc:
2509 // CHECK5-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2510 // CHECK5-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
2511 // CHECK5-NEXT:    store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4
2512 // CHECK5-NEXT:    br label [[OMP_INNER_FOR_COND]]
2513 // CHECK5:       omp.inner.for.end:
2514 // CHECK5-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
2515 // CHECK5:       omp.loop.exit:
2516 // CHECK5-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
2517 // CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
2518 // CHECK5-NEXT:    [[TMP16:%.*]] = bitcast i32* [[SIVAR2]] to i8*
2519 // CHECK5-NEXT:    store i8* [[TMP16]], i8** [[TMP15]], align 8
2520 // CHECK5-NEXT:    [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
2521 // CHECK5-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
2522 // CHECK5-NEXT:    switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
2523 // CHECK5-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
2524 // CHECK5-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
2525 // CHECK5-NEXT:    ]
2526 // CHECK5:       .omp.reduction.case1:
2527 // CHECK5-NEXT:    [[TMP19:%.*]] = load i32, i32* [[TMP0]], align 4
2528 // CHECK5-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR2]], align 4
2529 // CHECK5-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
2530 // CHECK5-NEXT:    store i32 [[ADD6]], i32* [[TMP0]], align 4
2531 // CHECK5-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
2532 // CHECK5-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2533 // CHECK5:       .omp.reduction.case2:
2534 // CHECK5-NEXT:    [[TMP21:%.*]] = load i32, i32* [[SIVAR2]], align 4
2535 // CHECK5-NEXT:    [[TMP22:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP21]] monotonic, align 4
2536 // CHECK5-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2537 // CHECK5:       .omp.reduction.default:
2538 // CHECK5-NEXT:    ret void
2539 //
2540 //
2541 // CHECK5-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
2542 // CHECK5-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
2543 // CHECK5-NEXT:  entry:
2544 // CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
2545 // CHECK5-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
2546 // CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
2547 // CHECK5-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
2548 // CHECK5-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
2549 // CHECK5-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
2550 // CHECK5-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
2551 // CHECK5-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
2552 // CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
2553 // CHECK5-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
2554 // CHECK5-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
2555 // CHECK5-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
2556 // CHECK5-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
2557 // CHECK5-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
2558 // CHECK5-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
2559 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
2560 // CHECK5-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2561 // CHECK5-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
2562 // CHECK5-NEXT:    ret void
2563 //
2564 //
2565 // CHECK5-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
2566 // CHECK5-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
2567 // CHECK5-NEXT:  entry:
2568 // CHECK5-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
2569 // CHECK5-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
2570 // CHECK5-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
2571 // CHECK5-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
2572 // CHECK5-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
2573 // CHECK5-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
2574 // CHECK5-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
2575 // CHECK5-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
2576 // CHECK5-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
2577 // CHECK5-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
2578 // CHECK5-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
2579 // CHECK5-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
2580 // CHECK5-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
2581 // CHECK5-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
2582 // CHECK5-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
2583 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
2584 // CHECK5-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2585 // CHECK5-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
2586 // CHECK5-NEXT:    ret void
2587 //
2588 //
2589 // CHECK5-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
2590 // CHECK5-SAME: () #[[ATTR6:[0-9]+]] {
2591 // CHECK5-NEXT:  entry:
2592 // CHECK5-NEXT:    call void @__tgt_register_requires(i64 1)
2593 // CHECK5-NEXT:    ret void
2594 //
2595 //
2596 // CHECK6-LABEL: define {{[^@]+}}@main
2597 // CHECK6-SAME: () #[[ATTR0:[0-9]+]] {
2598 // CHECK6-NEXT:  entry:
2599 // CHECK6-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
2600 // CHECK6-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
2601 // CHECK6-NEXT:    store i32 0, i32* [[RETVAL]], align 4
2602 // CHECK6-NEXT:    call void @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 1 dereferenceable(1) [[REF_TMP]])
2603 // CHECK6-NEXT:    ret i32 0
2604 //
2605 //
2606 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44
2607 // CHECK6-SAME: (i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] {
2608 // CHECK6-NEXT:  entry:
2609 // CHECK6-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
2610 // CHECK6-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
2611 // CHECK6-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
2612 // CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]])
2613 // CHECK6-NEXT:    ret void
2614 //
2615 //
2616 // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined.
2617 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] {
2618 // CHECK6-NEXT:  entry:
2619 // CHECK6-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
2620 // CHECK6-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
2621 // CHECK6-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
2622 // CHECK6-NEXT:    [[SIVAR1:%.*]] = alloca i32, align 4
2623 // CHECK6-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
2624 // CHECK6-NEXT:    [[TMP:%.*]] = alloca i32, align 4
2625 // CHECK6-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
2626 // CHECK6-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
2627 // CHECK6-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2628 // CHECK6-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2629 // CHECK6-NEXT:    [[I:%.*]] = alloca i32, align 4
2630 // CHECK6-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
2631 // CHECK6-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
2632 // CHECK6-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
2633 // CHECK6-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
2634 // CHECK6-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
2635 // CHECK6-NEXT:    store i32 0, i32* [[SIVAR1]], align 4
2636 // CHECK6-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
2637 // CHECK6-NEXT:    store i32 1, i32* [[DOTOMP_COMB_UB]], align 4
2638 // CHECK6-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
2639 // CHECK6-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
2640 // CHECK6-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
2641 // CHECK6-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
2642 // CHECK6-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
2643 // CHECK6-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2644 // CHECK6-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1
2645 // CHECK6-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2646 // CHECK6:       cond.true:
2647 // CHECK6-NEXT:    br label [[COND_END:%.*]]
2648 // CHECK6:       cond.false:
2649 // CHECK6-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2650 // CHECK6-NEXT:    br label [[COND_END]]
2651 // CHECK6:       cond.end:
2652 // CHECK6-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
2653 // CHECK6-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
2654 // CHECK6-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
2655 // CHECK6-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4
2656 // CHECK6-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
2657 // CHECK6:       omp.inner.for.cond:
2658 // CHECK6-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2659 // CHECK6-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2660 // CHECK6-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
2661 // CHECK6-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2662 // CHECK6:       omp.inner.for.body:
2663 // CHECK6-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
2664 // CHECK6-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
2665 // CHECK6-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
2666 // CHECK6-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
2667 // CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]])
2668 // CHECK6-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
2669 // CHECK6:       omp.inner.for.inc:
2670 // CHECK6-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2671 // CHECK6-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
2672 // CHECK6-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2673 // CHECK6-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
2674 // CHECK6-NEXT:    br label [[OMP_INNER_FOR_COND]]
2675 // CHECK6:       omp.inner.for.end:
2676 // CHECK6-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
2677 // CHECK6:       omp.loop.exit:
2678 // CHECK6-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
2679 // CHECK6-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
2680 // CHECK6-NEXT:    [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8*
2681 // CHECK6-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
2682 // CHECK6-NEXT:    [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
2683 // CHECK6-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
2684 // CHECK6-NEXT:    switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
2685 // CHECK6-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
2686 // CHECK6-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
2687 // CHECK6-NEXT:    ]
2688 // CHECK6:       .omp.reduction.case1:
2689 // CHECK6-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4
2690 // CHECK6-NEXT:    [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4
2691 // CHECK6-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
2692 // CHECK6-NEXT:    store i32 [[ADD3]], i32* [[TMP0]], align 4
2693 // CHECK6-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var)
2694 // CHECK6-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2695 // CHECK6:       .omp.reduction.case2:
2696 // CHECK6-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4
2697 // CHECK6-NEXT:    [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4
2698 // CHECK6-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2699 // CHECK6:       .omp.reduction.default:
2700 // CHECK6-NEXT:    ret void
2701 //
2702 //
2703 // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..1
2704 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] {
2705 // CHECK6-NEXT:  entry:
2706 // CHECK6-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
2707 // CHECK6-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
2708 // CHECK6-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
2709 // CHECK6-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
2710 // CHECK6-NEXT:    [[SIVAR_ADDR:%.*]] = alloca i32*, align 8
2711 // CHECK6-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
2712 // CHECK6-NEXT:    [[TMP:%.*]] = alloca i32, align 4
2713 // CHECK6-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
2714 // CHECK6-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
2715 // CHECK6-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
2716 // CHECK6-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
2717 // CHECK6-NEXT:    [[SIVAR2:%.*]] = alloca i32, align 4
2718 // CHECK6-NEXT:    [[I:%.*]] = alloca i32, align 4
2719 // CHECK6-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
2720 // CHECK6-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
2721 // CHECK6-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
2722 // CHECK6-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
2723 // CHECK6-NEXT:    store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8
2724 // CHECK6-NEXT:    store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8
2725 // CHECK6-NEXT:    store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8
2726 // CHECK6-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8
2727 // CHECK6-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
2728 // CHECK6-NEXT:    store i32 1, i32* [[DOTOMP_UB]], align 4
2729 // CHECK6-NEXT:    [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8
2730 // CHECK6-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
2731 // CHECK6-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8
2732 // CHECK6-NEXT:    [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
2733 // CHECK6-NEXT:    store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4
2734 // CHECK6-NEXT:    store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4
2735 // CHECK6-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
2736 // CHECK6-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
2737 // CHECK6-NEXT:    store i32 0, i32* [[SIVAR2]], align 4
2738 // CHECK6-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
2739 // CHECK6-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
2740 // CHECK6-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
2741 // CHECK6-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2742 // CHECK6-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1
2743 // CHECK6-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
2744 // CHECK6:       cond.true:
2745 // CHECK6-NEXT:    br label [[COND_END:%.*]]
2746 // CHECK6:       cond.false:
2747 // CHECK6-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2748 // CHECK6-NEXT:    br label [[COND_END]]
2749 // CHECK6:       cond.end:
2750 // CHECK6-NEXT:    [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ]
2751 // CHECK6-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
2752 // CHECK6-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
2753 // CHECK6-NEXT:    store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4
2754 // CHECK6-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
2755 // CHECK6:       omp.inner.for.cond:
2756 // CHECK6-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2757 // CHECK6-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
2758 // CHECK6-NEXT:    [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
2759 // CHECK6-NEXT:    br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
2760 // CHECK6:       omp.inner.for.body:
2761 // CHECK6-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2762 // CHECK6-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
2763 // CHECK6-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
2764 // CHECK6-NEXT:    store i32 [[ADD]], i32* [[I]], align 4
2765 // CHECK6-NEXT:    [[TMP11:%.*]] = load i32, i32* [[I]], align 4
2766 // CHECK6-NEXT:    [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4
2767 // CHECK6-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
2768 // CHECK6-NEXT:    store i32 [[ADD4]], i32* [[SIVAR2]], align 4
2769 // CHECK6-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
2770 // CHECK6-NEXT:    store i32* [[SIVAR2]], i32** [[TMP13]], align 8
2771 // CHECK6-NEXT:    call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(8) [[REF_TMP]])
2772 // CHECK6-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
2773 // CHECK6:       omp.body.continue:
2774 // CHECK6-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
2775 // CHECK6:       omp.inner.for.inc:
2776 // CHECK6-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
2777 // CHECK6-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1
2778 // CHECK6-NEXT:    store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4
2779 // CHECK6-NEXT:    br label [[OMP_INNER_FOR_COND]]
2780 // CHECK6:       omp.inner.for.end:
2781 // CHECK6-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
2782 // CHECK6:       omp.loop.exit:
2783 // CHECK6-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
2784 // CHECK6-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
2785 // CHECK6-NEXT:    [[TMP16:%.*]] = bitcast i32* [[SIVAR2]] to i8*
2786 // CHECK6-NEXT:    store i8* [[TMP16]], i8** [[TMP15]], align 8
2787 // CHECK6-NEXT:    [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
2788 // CHECK6-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
2789 // CHECK6-NEXT:    switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
2790 // CHECK6-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
2791 // CHECK6-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
2792 // CHECK6-NEXT:    ]
2793 // CHECK6:       .omp.reduction.case1:
2794 // CHECK6-NEXT:    [[TMP19:%.*]] = load i32, i32* [[TMP0]], align 4
2795 // CHECK6-NEXT:    [[TMP20:%.*]] = load i32, i32* [[SIVAR2]], align 4
2796 // CHECK6-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
2797 // CHECK6-NEXT:    store i32 [[ADD6]], i32* [[TMP0]], align 4
2798 // CHECK6-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var)
2799 // CHECK6-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2800 // CHECK6:       .omp.reduction.case2:
2801 // CHECK6-NEXT:    [[TMP21:%.*]] = load i32, i32* [[SIVAR2]], align 4
2802 // CHECK6-NEXT:    [[TMP22:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP21]] monotonic, align 4
2803 // CHECK6-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
2804 // CHECK6:       .omp.reduction.default:
2805 // CHECK6-NEXT:    ret void
2806 //
2807 //
2808 // CHECK6-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
2809 // CHECK6-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
2810 // CHECK6-NEXT:  entry:
2811 // CHECK6-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
2812 // CHECK6-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
2813 // CHECK6-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
2814 // CHECK6-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
2815 // CHECK6-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
2816 // CHECK6-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
2817 // CHECK6-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
2818 // CHECK6-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
2819 // CHECK6-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
2820 // CHECK6-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
2821 // CHECK6-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
2822 // CHECK6-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
2823 // CHECK6-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
2824 // CHECK6-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
2825 // CHECK6-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
2826 // CHECK6-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
2827 // CHECK6-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2828 // CHECK6-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
2829 // CHECK6-NEXT:    ret void
2830 //
2831 //
2832 // CHECK6-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
2833 // CHECK6-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR4]] {
2834 // CHECK6-NEXT:  entry:
2835 // CHECK6-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
2836 // CHECK6-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
2837 // CHECK6-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
2838 // CHECK6-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
2839 // CHECK6-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
2840 // CHECK6-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
2841 // CHECK6-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
2842 // CHECK6-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
2843 // CHECK6-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
2844 // CHECK6-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
2845 // CHECK6-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
2846 // CHECK6-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
2847 // CHECK6-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
2848 // CHECK6-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
2849 // CHECK6-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
2850 // CHECK6-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
2851 // CHECK6-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
2852 // CHECK6-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
2853 // CHECK6-NEXT:    ret void
2854 //
2855 //
2856 // CHECK6-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
2857 // CHECK6-SAME: () #[[ATTR6:[0-9]+]] {
2858 // CHECK6-NEXT:  entry:
2859 // CHECK6-NEXT:    call void @__tgt_register_requires(i64 1)
2860 // CHECK6-NEXT:    ret void
2861 //
2862 //