1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
3; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -slp-threshold=-10 | FileCheck %s --check-prefix=THRESHOLD
4
5@n = external local_unnamed_addr global i32, align 4
6@arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
7@arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
8@res = external local_unnamed_addr global float, align 4
9
10define float @baz() {
11; CHECK-LABEL: @baz(
12; CHECK-NEXT:  entry:
13; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
14; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
15; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
16; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
17; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
18; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
19; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
20; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]]
21; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
22; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]]
23; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
24; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
25; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
26; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
27; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]]
28; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
29; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]]
30; CHECK-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]]
31; CHECK-NEXT:    [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]]
32; CHECK-NEXT:    [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]]
33; CHECK-NEXT:    [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]]
34; CHECK-NEXT:    [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]]
35; CHECK-NEXT:    store float [[ADD19_3]], float* @res, align 4
36; CHECK-NEXT:    ret float [[ADD19_3]]
37;
38; THRESHOLD-LABEL: @baz(
39; THRESHOLD-NEXT:  entry:
40; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
41; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
42; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
43; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
44; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
45; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
46; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
47; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]]
48; THRESHOLD-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
49; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]]
50; THRESHOLD-NEXT:    [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
51; THRESHOLD-NEXT:    [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
52; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
53; THRESHOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
54; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]]
55; THRESHOLD-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
56; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]]
57; THRESHOLD-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]]
58; THRESHOLD-NEXT:    [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]]
59; THRESHOLD-NEXT:    [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]]
60; THRESHOLD-NEXT:    [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]]
61; THRESHOLD-NEXT:    [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]]
62; THRESHOLD-NEXT:    store float [[ADD19_3]], float* @res, align 4
63; THRESHOLD-NEXT:    ret float [[ADD19_3]]
64;
65entry:
66  %0 = load i32, i32* @n, align 4
67  %mul = mul nsw i32 %0, 3
68  %conv = sitofp i32 %mul to float
69  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
70  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
71  %mul4 = fmul fast float %2, %1
72  %add = fadd fast float %mul4, %conv
73  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
74  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
75  %mul4.1 = fmul fast float %4, %3
76  %add.1 = fadd fast float %mul4.1, %add
77  %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
78  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
79  %mul4.2 = fmul fast float %6, %5
80  %add.2 = fadd fast float %mul4.2, %add.1
81  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
82  %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
83  %mul4.3 = fmul fast float %8, %7
84  %add.3 = fadd fast float %mul4.3, %add.2
85  %add7 = fadd fast float %add.3, %conv
86  %add19 = fadd fast float %mul4, %add7
87  %add19.1 = fadd fast float %mul4.1, %add19
88  %add19.2 = fadd fast float %mul4.2, %add19.1
89  %add19.3 = fadd fast float %mul4.3, %add19.2
90  store float %add19.3, float* @res, align 4
91  ret float %add19.3
92}
93
94define float @bazz() {
95; CHECK-LABEL: @bazz(
96; CHECK-NEXT:  entry:
97; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
98; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
99; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
100; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
101; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
102; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
103; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
104; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
105; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
106; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
107; CHECK-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
108; CHECK-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
109; CHECK-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
110; CHECK-NEXT:    [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
111; CHECK-NEXT:    [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
112; CHECK-NEXT:    [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
113; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
114; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]]
115; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
116; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
117; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
118; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
119; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
120; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
121; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
122; CHECK-NEXT:    [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
123; CHECK-NEXT:    store float [[OP_EXTRA5]], float* @res, align 4
124; CHECK-NEXT:    ret float [[OP_EXTRA5]]
125;
126; THRESHOLD-LABEL: @bazz(
127; THRESHOLD-NEXT:  entry:
128; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
129; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
130; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
131; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
132; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
133; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
134; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
135; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
136; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
137; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
138; THRESHOLD-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
139; THRESHOLD-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
140; THRESHOLD-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
141; THRESHOLD-NEXT:    [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
142; THRESHOLD-NEXT:    [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
143; THRESHOLD-NEXT:    [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
144; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
145; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]]
146; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
147; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
148; THRESHOLD-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
149; THRESHOLD-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
150; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
151; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
152; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
153; THRESHOLD-NEXT:    [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
154; THRESHOLD-NEXT:    store float [[OP_EXTRA5]], float* @res, align 4
155; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
156;
157entry:
158  %0 = load i32, i32* @n, align 4
159  %mul = mul nsw i32 %0, 3
160  %conv = sitofp i32 %mul to float
161  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
162  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
163  %mul4 = fmul fast float %2, %1
164  %add = fadd fast float %mul4, %conv
165  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
166  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
167  %mul4.1 = fmul fast float %4, %3
168  %add.1 = fadd fast float %mul4.1, %add
169  %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
170  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
171  %mul4.2 = fmul fast float %6, %5
172  %add.2 = fadd fast float %mul4.2, %add.1
173  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
174  %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
175  %mul4.3 = fmul fast float %8, %7
176  %add.3 = fadd fast float %mul4.3, %add.2
177  %mul5 = shl nsw i32 %0, 2
178  %conv6 = sitofp i32 %mul5 to float
179  %add7 = fadd fast float %add.3, %conv6
180  %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16
181  %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16
182  %mul18 = fmul fast float %10, %9
183  %add19 = fadd fast float %mul18, %add7
184  %11 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4
185  %12 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4
186  %mul18.1 = fmul fast float %12, %11
187  %add19.1 = fadd fast float %mul18.1, %add19
188  %13 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6), align 8
189  %14 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6), align 8
190  %mul18.2 = fmul fast float %14, %13
191  %add19.2 = fadd fast float %mul18.2, %add19.1
192  %15 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 7), align 4
193  %16 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 7), align 4
194  %mul18.3 = fmul fast float %16, %15
195  %add19.3 = fadd fast float %mul18.3, %add19.2
196  store float %add19.3, float* @res, align 4
197  ret float %add19.3
198}
199
200define float @bazzz() {
201; CHECK-LABEL: @bazzz(
202; CHECK-NEXT:  entry:
203; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
204; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
205; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
206; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
207; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
208; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
209; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
210; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
211; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
212; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
213; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
214; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
215; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
216; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
217; CHECK-NEXT:    store float [[TMP8]], float* @res, align 4
218; CHECK-NEXT:    ret float [[TMP8]]
219;
220; THRESHOLD-LABEL: @bazzz(
221; THRESHOLD-NEXT:  entry:
222; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
223; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
224; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
225; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
226; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
227; THRESHOLD-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
228; THRESHOLD-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
229; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
230; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
231; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
232; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
233; THRESHOLD-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
234; THRESHOLD-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
235; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
236; THRESHOLD-NEXT:    store float [[TMP8]], float* @res, align 4
237; THRESHOLD-NEXT:    ret float [[TMP8]]
238;
239entry:
240  %0 = load i32, i32* @n, align 4
241  %conv = sitofp i32 %0 to float
242  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
243  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
244  %mul = fmul fast float %2, %1
245  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
246  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
247  %mul.1 = fmul fast float %4, %3
248  %5 = fadd fast float %mul.1, %mul
249  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
250  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
251  %mul.2 = fmul fast float %7, %6
252  %8 = fadd fast float %mul.2, %5
253  %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
254  %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
255  %mul.3 = fmul fast float %10, %9
256  %11 = fadd fast float %mul.3, %8
257  %12 = fmul fast float %conv, %11
258  store float %12, float* @res, align 4
259  ret float %12
260}
261
262define i32 @foo() {
263; CHECK-LABEL: @foo(
264; CHECK-NEXT:  entry:
265; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
266; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
267; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
268; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
269; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
270; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
271; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
272; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
273; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
274; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
275; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
276; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
277; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
278; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
279; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
280; CHECK-NEXT:    store i32 [[CONV4]], i32* @n, align 4
281; CHECK-NEXT:    ret i32 [[CONV4]]
282;
283; THRESHOLD-LABEL: @foo(
284; THRESHOLD-NEXT:  entry:
285; THRESHOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
286; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
287; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
288; THRESHOLD-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
289; THRESHOLD-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
290; THRESHOLD-NEXT:    [[TMP4:%.*]] = fadd fast float undef, undef
291; THRESHOLD-NEXT:    [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
292; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
293; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
294; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
295; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
296; THRESHOLD-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
297; THRESHOLD-NEXT:    [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
298; THRESHOLD-NEXT:    [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
299; THRESHOLD-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
300; THRESHOLD-NEXT:    store i32 [[CONV4]], i32* @n, align 4
301; THRESHOLD-NEXT:    ret i32 [[CONV4]]
302;
303entry:
304  %0 = load i32, i32* @n, align 4
305  %conv = sitofp i32 %0 to float
306  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
307  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
308  %mul = fmul fast float %2, %1
309  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
310  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
311  %mul.1 = fmul fast float %4, %3
312  %5 = fadd fast float %mul.1, %mul
313  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
314  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
315  %mul.2 = fmul fast float %7, %6
316  %8 = fadd fast float %mul.2, %5
317  %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
318  %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
319  %mul.3 = fmul fast float %10, %9
320  %11 = fadd fast float %mul.3, %8
321  %12 = fmul fast float %conv, %11
322  %conv4 = fptosi float %12 to i32
323  store i32 %conv4, i32* @n, align 4
324  ret i32 %conv4
325}
326
327define float @bar() {
328; CHECK-LABEL: @bar(
329; CHECK-NEXT:  entry:
330; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
331; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
332; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
333; CHECK-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float undef, undef
334; CHECK-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef
335; CHECK-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef
336; CHECK-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef
337; CHECK-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef
338; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
339; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]]
340; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
341; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
342; CHECK-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
343; CHECK-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
344; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0
345; CHECK-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef
346; CHECK-NEXT:    store float [[TMP3]], float* @res, align 4
347; CHECK-NEXT:    ret float [[TMP3]]
348;
349; THRESHOLD-LABEL: @bar(
350; THRESHOLD-NEXT:  entry:
351; THRESHOLD-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
352; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
353; THRESHOLD-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
354; THRESHOLD-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float undef, undef
355; THRESHOLD-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef
356; THRESHOLD-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef
357; THRESHOLD-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef
358; THRESHOLD-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef
359; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
360; THRESHOLD-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]]
361; THRESHOLD-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
362; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
363; THRESHOLD-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
364; THRESHOLD-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
365; THRESHOLD-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0
366; THRESHOLD-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef
367; THRESHOLD-NEXT:    store float [[TMP3]], float* @res, align 4
368; THRESHOLD-NEXT:    ret float [[TMP3]]
369;
370entry:
371  %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
372  %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
373  %mul = fmul fast float %1, %0
374  %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
375  %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
376  %mul3 = fmul fast float %3, %2
377  %cmp4 = fcmp fast ogt float %mul, %mul3
378  %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3
379  %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
380  %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
381  %mul3.1 = fmul fast float %5, %4
382  %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1
383  %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1
384  %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
385  %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
386  %mul3.2 = fmul fast float %7, %6
387  %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2
388  %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2
389  store float %max.0.mul3.2, float* @res, align 4
390  ret float %max.0.mul3.2
391}
392
393define float @f(float* nocapture readonly %x) {
394; CHECK-LABEL: @f(
395; CHECK-NEXT:  entry:
396; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
397; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
398; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
399; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
400; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
401; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
402; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
403; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
404; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
405; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
406; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
407; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
408; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
409; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
410; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
411; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
412; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
413; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, undef
414; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
415; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
416; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
417; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
418; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
419; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
420; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
421; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
422; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
423; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
424; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
425; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
426; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
427; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
428; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
429; CHECK-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
430; CHECK-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
431; CHECK-NEXT:    [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
432; CHECK-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
433; CHECK-NEXT:    [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
434; CHECK-NEXT:    [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
435; CHECK-NEXT:    [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
436; CHECK-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
437; CHECK-NEXT:    [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
438; CHECK-NEXT:    [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
439; CHECK-NEXT:    [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
440; CHECK-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
441; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
442; CHECK-NEXT:    [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
443; CHECK-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
444; CHECK-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32
445; CHECK-NEXT:    [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33
446; CHECK-NEXT:    [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34
447; CHECK-NEXT:    [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35
448; CHECK-NEXT:    [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36
449; CHECK-NEXT:    [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37
450; CHECK-NEXT:    [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38
451; CHECK-NEXT:    [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39
452; CHECK-NEXT:    [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40
453; CHECK-NEXT:    [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41
454; CHECK-NEXT:    [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42
455; CHECK-NEXT:    [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43
456; CHECK-NEXT:    [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44
457; CHECK-NEXT:    [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45
458; CHECK-NEXT:    [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46
459; CHECK-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47
460; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
461; CHECK-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
462; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
463; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
464; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
465; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
466; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
467; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
468; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
469; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
470; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
471; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
472; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
473; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
474; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
475; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
476; CHECK-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
477; CHECK-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
478; CHECK-NEXT:    [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
479; CHECK-NEXT:    [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
480; CHECK-NEXT:    [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
481; CHECK-NEXT:    [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
482; CHECK-NEXT:    [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
483; CHECK-NEXT:    [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
484; CHECK-NEXT:    [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
485; CHECK-NEXT:    [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
486; CHECK-NEXT:    [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
487; CHECK-NEXT:    [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
488; CHECK-NEXT:    [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
489; CHECK-NEXT:    [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
490; CHECK-NEXT:    [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
491; CHECK-NEXT:    [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
492; CHECK-NEXT:    [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
493; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
494; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]]
495; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
496; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]]
497; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
498; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
499; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
500; CHECK-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
501; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
502; CHECK-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
503; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
504; CHECK-NEXT:    [[RDX_SHUF9:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
505; CHECK-NEXT:    [[BIN_RDX10:%.*]] = fadd fast <16 x float> [[TMP1]], [[RDX_SHUF9]]
506; CHECK-NEXT:    [[RDX_SHUF11:%.*]] = shufflevector <16 x float> [[BIN_RDX10]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
507; CHECK-NEXT:    [[BIN_RDX12:%.*]] = fadd fast <16 x float> [[BIN_RDX10]], [[RDX_SHUF11]]
508; CHECK-NEXT:    [[RDX_SHUF13:%.*]] = shufflevector <16 x float> [[BIN_RDX12]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
509; CHECK-NEXT:    [[BIN_RDX14:%.*]] = fadd fast <16 x float> [[BIN_RDX12]], [[RDX_SHUF13]]
510; CHECK-NEXT:    [[RDX_SHUF15:%.*]] = shufflevector <16 x float> [[BIN_RDX14]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
511; CHECK-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]]
512; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0
513; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
514; CHECK-NEXT:    [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
515; CHECK-NEXT:    ret float [[OP_RDX]]
516;
517; THRESHOLD-LABEL: @f(
518; THRESHOLD-NEXT:  entry:
519; THRESHOLD-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
520; THRESHOLD-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
521; THRESHOLD-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
522; THRESHOLD-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
523; THRESHOLD-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
524; THRESHOLD-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
525; THRESHOLD-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
526; THRESHOLD-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
527; THRESHOLD-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
528; THRESHOLD-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
529; THRESHOLD-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
530; THRESHOLD-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
531; THRESHOLD-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
532; THRESHOLD-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
533; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
534; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
535; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
536; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, undef
537; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
538; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
539; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
540; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
541; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
542; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
543; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
544; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
545; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
546; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
547; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
548; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
549; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
550; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
551; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
552; THRESHOLD-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
553; THRESHOLD-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
554; THRESHOLD-NEXT:    [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
555; THRESHOLD-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
556; THRESHOLD-NEXT:    [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
557; THRESHOLD-NEXT:    [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
558; THRESHOLD-NEXT:    [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
559; THRESHOLD-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
560; THRESHOLD-NEXT:    [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
561; THRESHOLD-NEXT:    [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
562; THRESHOLD-NEXT:    [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
563; THRESHOLD-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
564; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
565; THRESHOLD-NEXT:    [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
566; THRESHOLD-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
567; THRESHOLD-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32
568; THRESHOLD-NEXT:    [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33
569; THRESHOLD-NEXT:    [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34
570; THRESHOLD-NEXT:    [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35
571; THRESHOLD-NEXT:    [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36
572; THRESHOLD-NEXT:    [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37
573; THRESHOLD-NEXT:    [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38
574; THRESHOLD-NEXT:    [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39
575; THRESHOLD-NEXT:    [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40
576; THRESHOLD-NEXT:    [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41
577; THRESHOLD-NEXT:    [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42
578; THRESHOLD-NEXT:    [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43
579; THRESHOLD-NEXT:    [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44
580; THRESHOLD-NEXT:    [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45
581; THRESHOLD-NEXT:    [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46
582; THRESHOLD-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47
583; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
584; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
585; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
586; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
587; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
588; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
589; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
590; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
591; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
592; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
593; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
594; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
595; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
596; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
597; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
598; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
599; THRESHOLD-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
600; THRESHOLD-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
601; THRESHOLD-NEXT:    [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
602; THRESHOLD-NEXT:    [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
603; THRESHOLD-NEXT:    [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
604; THRESHOLD-NEXT:    [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
605; THRESHOLD-NEXT:    [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
606; THRESHOLD-NEXT:    [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
607; THRESHOLD-NEXT:    [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
608; THRESHOLD-NEXT:    [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
609; THRESHOLD-NEXT:    [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
610; THRESHOLD-NEXT:    [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
611; THRESHOLD-NEXT:    [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
612; THRESHOLD-NEXT:    [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
613; THRESHOLD-NEXT:    [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
614; THRESHOLD-NEXT:    [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
615; THRESHOLD-NEXT:    [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
616; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
617; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]]
618; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
619; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]]
620; THRESHOLD-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
621; THRESHOLD-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
622; THRESHOLD-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
623; THRESHOLD-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
624; THRESHOLD-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
625; THRESHOLD-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
626; THRESHOLD-NEXT:    [[TMP4:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
627; THRESHOLD-NEXT:    [[RDX_SHUF9:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
628; THRESHOLD-NEXT:    [[BIN_RDX10:%.*]] = fadd fast <16 x float> [[TMP1]], [[RDX_SHUF9]]
629; THRESHOLD-NEXT:    [[RDX_SHUF11:%.*]] = shufflevector <16 x float> [[BIN_RDX10]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
630; THRESHOLD-NEXT:    [[BIN_RDX12:%.*]] = fadd fast <16 x float> [[BIN_RDX10]], [[RDX_SHUF11]]
631; THRESHOLD-NEXT:    [[RDX_SHUF13:%.*]] = shufflevector <16 x float> [[BIN_RDX12]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
632; THRESHOLD-NEXT:    [[BIN_RDX14:%.*]] = fadd fast <16 x float> [[BIN_RDX12]], [[RDX_SHUF13]]
633; THRESHOLD-NEXT:    [[RDX_SHUF15:%.*]] = shufflevector <16 x float> [[BIN_RDX14]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
634; THRESHOLD-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]]
635; THRESHOLD-NEXT:    [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0
636; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
637; THRESHOLD-NEXT:    [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
638; THRESHOLD-NEXT:    ret float [[OP_RDX]]
639;
640  entry:
641  %0 = load float, float* %x, align 4
642  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1
643  %1 = load float, float* %arrayidx.1, align 4
644  %add.1 = fadd fast float %1, %0
645  %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2
646  %2 = load float, float* %arrayidx.2, align 4
647  %add.2 = fadd fast float %2, %add.1
648  %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3
649  %3 = load float, float* %arrayidx.3, align 4
650  %add.3 = fadd fast float %3, %add.2
651  %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4
652  %4 = load float, float* %arrayidx.4, align 4
653  %add.4 = fadd fast float %4, %add.3
654  %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5
655  %5 = load float, float* %arrayidx.5, align 4
656  %add.5 = fadd fast float %5, %add.4
657  %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6
658  %6 = load float, float* %arrayidx.6, align 4
659  %add.6 = fadd fast float %6, %add.5
660  %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7
661  %7 = load float, float* %arrayidx.7, align 4
662  %add.7 = fadd fast float %7, %add.6
663  %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8
664  %8 = load float, float* %arrayidx.8, align 4
665  %add.8 = fadd fast float %8, %add.7
666  %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9
667  %9 = load float, float* %arrayidx.9, align 4
668  %add.9 = fadd fast float %9, %add.8
669  %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10
670  %10 = load float, float* %arrayidx.10, align 4
671  %add.10 = fadd fast float %10, %add.9
672  %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11
673  %11 = load float, float* %arrayidx.11, align 4
674  %add.11 = fadd fast float %11, %add.10
675  %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12
676  %12 = load float, float* %arrayidx.12, align 4
677  %add.12 = fadd fast float %12, %add.11
678  %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13
679  %13 = load float, float* %arrayidx.13, align 4
680  %add.13 = fadd fast float %13, %add.12
681  %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14
682  %14 = load float, float* %arrayidx.14, align 4
683  %add.14 = fadd fast float %14, %add.13
684  %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15
685  %15 = load float, float* %arrayidx.15, align 4
686  %add.15 = fadd fast float %15, %add.14
687  %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16
688  %16 = load float, float* %arrayidx.16, align 4
689  %add.16 = fadd fast float %16, %add.15
690  %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17
691  %17 = load float, float* %arrayidx.17, align 4
692  %add.17 = fadd fast float %17, %add.16
693  %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18
694  %18 = load float, float* %arrayidx.18, align 4
695  %add.18 = fadd fast float %18, %add.17
696  %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19
697  %19 = load float, float* %arrayidx.19, align 4
698  %add.19 = fadd fast float %19, %add.18
699  %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20
700  %20 = load float, float* %arrayidx.20, align 4
701  %add.20 = fadd fast float %20, %add.19
702  %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21
703  %21 = load float, float* %arrayidx.21, align 4
704  %add.21 = fadd fast float %21, %add.20
705  %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22
706  %22 = load float, float* %arrayidx.22, align 4
707  %add.22 = fadd fast float %22, %add.21
708  %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23
709  %23 = load float, float* %arrayidx.23, align 4
710  %add.23 = fadd fast float %23, %add.22
711  %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24
712  %24 = load float, float* %arrayidx.24, align 4
713  %add.24 = fadd fast float %24, %add.23
714  %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25
715  %25 = load float, float* %arrayidx.25, align 4
716  %add.25 = fadd fast float %25, %add.24
717  %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26
718  %26 = load float, float* %arrayidx.26, align 4
719  %add.26 = fadd fast float %26, %add.25
720  %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27
721  %27 = load float, float* %arrayidx.27, align 4
722  %add.27 = fadd fast float %27, %add.26
723  %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28
724  %28 = load float, float* %arrayidx.28, align 4
725  %add.28 = fadd fast float %28, %add.27
726  %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29
727  %29 = load float, float* %arrayidx.29, align 4
728  %add.29 = fadd fast float %29, %add.28
729  %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30
730  %30 = load float, float* %arrayidx.30, align 4
731  %add.30 = fadd fast float %30, %add.29
732  %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31
733  %31 = load float, float* %arrayidx.31, align 4
734  %add.31 = fadd fast float %31, %add.30
735  %arrayidx.32 = getelementptr inbounds float, float* %x, i64 32
736  %32 = load float, float* %arrayidx.32, align 4
737  %add.32 = fadd fast float %32, %add.31
738  %arrayidx.33 = getelementptr inbounds float, float* %x, i64 33
739  %33 = load float, float* %arrayidx.33, align 4
740  %add.33 = fadd fast float %33, %add.32
741  %arrayidx.34 = getelementptr inbounds float, float* %x, i64 34
742  %34 = load float, float* %arrayidx.34, align 4
743  %add.34 = fadd fast float %34, %add.33
744  %arrayidx.35 = getelementptr inbounds float, float* %x, i64 35
745  %35 = load float, float* %arrayidx.35, align 4
746  %add.35 = fadd fast float %35, %add.34
747  %arrayidx.36 = getelementptr inbounds float, float* %x, i64 36
748  %36 = load float, float* %arrayidx.36, align 4
749  %add.36 = fadd fast float %36, %add.35
750  %arrayidx.37 = getelementptr inbounds float, float* %x, i64 37
751  %37 = load float, float* %arrayidx.37, align 4
752  %add.37 = fadd fast float %37, %add.36
753  %arrayidx.38 = getelementptr inbounds float, float* %x, i64 38
754  %38 = load float, float* %arrayidx.38, align 4
755  %add.38 = fadd fast float %38, %add.37
756  %arrayidx.39 = getelementptr inbounds float, float* %x, i64 39
757  %39 = load float, float* %arrayidx.39, align 4
758  %add.39 = fadd fast float %39, %add.38
759  %arrayidx.40 = getelementptr inbounds float, float* %x, i64 40
760  %40 = load float, float* %arrayidx.40, align 4
761  %add.40 = fadd fast float %40, %add.39
762  %arrayidx.41 = getelementptr inbounds float, float* %x, i64 41
763  %41 = load float, float* %arrayidx.41, align 4
764  %add.41 = fadd fast float %41, %add.40
765  %arrayidx.42 = getelementptr inbounds float, float* %x, i64 42
766  %42 = load float, float* %arrayidx.42, align 4
767  %add.42 = fadd fast float %42, %add.41
768  %arrayidx.43 = getelementptr inbounds float, float* %x, i64 43
769  %43 = load float, float* %arrayidx.43, align 4
770  %add.43 = fadd fast float %43, %add.42
771  %arrayidx.44 = getelementptr inbounds float, float* %x, i64 44
772  %44 = load float, float* %arrayidx.44, align 4
773  %add.44 = fadd fast float %44, %add.43
774  %arrayidx.45 = getelementptr inbounds float, float* %x, i64 45
775  %45 = load float, float* %arrayidx.45, align 4
776  %add.45 = fadd fast float %45, %add.44
777  %arrayidx.46 = getelementptr inbounds float, float* %x, i64 46
778  %46 = load float, float* %arrayidx.46, align 4
779  %add.46 = fadd fast float %46, %add.45
780  %arrayidx.47 = getelementptr inbounds float, float* %x, i64 47
781  %47 = load float, float* %arrayidx.47, align 4
782  %add.47 = fadd fast float %47, %add.46
783  ret float %add.47
784}
785
786define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) {
787; CHECK-LABEL: @f1(
788; CHECK-NEXT:  entry:
789; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
790; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[REM]] to float
791; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
792; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
793; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
794; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
795; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
796; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
797; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
798; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
799; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
800; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
801; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
802; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
803; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
804; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
805; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
806; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
807; CHECK-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
808; CHECK-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
809; CHECK-NEXT:    [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
810; CHECK-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
811; CHECK-NEXT:    [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
812; CHECK-NEXT:    [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
813; CHECK-NEXT:    [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
814; CHECK-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
815; CHECK-NEXT:    [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
816; CHECK-NEXT:    [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
817; CHECK-NEXT:    [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
818; CHECK-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
819; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
820; CHECK-NEXT:    [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
821; CHECK-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
822; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
823; CHECK-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
824; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
825; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
826; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
827; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
828; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
829; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
830; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
831; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
832; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
833; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
834; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
835; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
836; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
837; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
838; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
839; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
840; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
841; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
842; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
843; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
844; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
845; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
846; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
847; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
848; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
849; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
850; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
851; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
852; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
853; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
854; CHECK-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
855; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
856; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]]
857; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
858; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]]
859; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
860; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
861; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
862; CHECK-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
863; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
864; CHECK-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
865; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
866; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
867; CHECK-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
868; CHECK-NEXT:    ret float [[OP_EXTRA]]
869;
870; THRESHOLD-LABEL: @f1(
871; THRESHOLD-NEXT:  entry:
872; THRESHOLD-NEXT:    [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]]
873; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[REM]] to float
874; THRESHOLD-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
875; THRESHOLD-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
876; THRESHOLD-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
877; THRESHOLD-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
878; THRESHOLD-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
879; THRESHOLD-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
880; THRESHOLD-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
881; THRESHOLD-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
882; THRESHOLD-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
883; THRESHOLD-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
884; THRESHOLD-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
885; THRESHOLD-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
886; THRESHOLD-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
887; THRESHOLD-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
888; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
889; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
890; THRESHOLD-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
891; THRESHOLD-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
892; THRESHOLD-NEXT:    [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
893; THRESHOLD-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
894; THRESHOLD-NEXT:    [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
895; THRESHOLD-NEXT:    [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
896; THRESHOLD-NEXT:    [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
897; THRESHOLD-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
898; THRESHOLD-NEXT:    [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
899; THRESHOLD-NEXT:    [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
900; THRESHOLD-NEXT:    [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
901; THRESHOLD-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
902; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
903; THRESHOLD-NEXT:    [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
904; THRESHOLD-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
905; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
906; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
907; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float undef, [[CONV]]
908; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
909; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
910; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
911; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
912; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
913; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
914; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
915; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
916; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
917; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
918; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
919; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
920; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
921; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
922; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
923; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
924; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
925; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
926; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
927; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
928; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
929; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
930; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
931; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
932; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
933; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
934; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
935; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
936; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
937; THRESHOLD-NEXT:    [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
938; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
939; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]]
940; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
941; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]]
942; THRESHOLD-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
943; THRESHOLD-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
944; THRESHOLD-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
945; THRESHOLD-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
946; THRESHOLD-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
947; THRESHOLD-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
948; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
949; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
950; THRESHOLD-NEXT:    [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
951; THRESHOLD-NEXT:    ret float [[OP_EXTRA]]
952;
953  entry:
954  %rem = srem i32 %a, %b
955  %conv = sitofp i32 %rem to float
956  %0 = load float, float* %x, align 4
957  %add = fadd fast float %0, %conv
958  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1
959  %1 = load float, float* %arrayidx.1, align 4
960  %add.1 = fadd fast float %1, %add
961  %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2
962  %2 = load float, float* %arrayidx.2, align 4
963  %add.2 = fadd fast float %2, %add.1
964  %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3
965  %3 = load float, float* %arrayidx.3, align 4
966  %add.3 = fadd fast float %3, %add.2
967  %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4
968  %4 = load float, float* %arrayidx.4, align 4
969  %add.4 = fadd fast float %4, %add.3
970  %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5
971  %5 = load float, float* %arrayidx.5, align 4
972  %add.5 = fadd fast float %5, %add.4
973  %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6
974  %6 = load float, float* %arrayidx.6, align 4
975  %add.6 = fadd fast float %6, %add.5
976  %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7
977  %7 = load float, float* %arrayidx.7, align 4
978  %add.7 = fadd fast float %7, %add.6
979  %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8
980  %8 = load float, float* %arrayidx.8, align 4
981  %add.8 = fadd fast float %8, %add.7
982  %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9
983  %9 = load float, float* %arrayidx.9, align 4
984  %add.9 = fadd fast float %9, %add.8
985  %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10
986  %10 = load float, float* %arrayidx.10, align 4
987  %add.10 = fadd fast float %10, %add.9
988  %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11
989  %11 = load float, float* %arrayidx.11, align 4
990  %add.11 = fadd fast float %11, %add.10
991  %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12
992  %12 = load float, float* %arrayidx.12, align 4
993  %add.12 = fadd fast float %12, %add.11
994  %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13
995  %13 = load float, float* %arrayidx.13, align 4
996  %add.13 = fadd fast float %13, %add.12
997  %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14
998  %14 = load float, float* %arrayidx.14, align 4
999  %add.14 = fadd fast float %14, %add.13
1000  %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15
1001  %15 = load float, float* %arrayidx.15, align 4
1002  %add.15 = fadd fast float %15, %add.14
1003  %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16
1004  %16 = load float, float* %arrayidx.16, align 4
1005  %add.16 = fadd fast float %16, %add.15
1006  %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17
1007  %17 = load float, float* %arrayidx.17, align 4
1008  %add.17 = fadd fast float %17, %add.16
1009  %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18
1010  %18 = load float, float* %arrayidx.18, align 4
1011  %add.18 = fadd fast float %18, %add.17
1012  %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19
1013  %19 = load float, float* %arrayidx.19, align 4
1014  %add.19 = fadd fast float %19, %add.18
1015  %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20
1016  %20 = load float, float* %arrayidx.20, align 4
1017  %add.20 = fadd fast float %20, %add.19
1018  %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21
1019  %21 = load float, float* %arrayidx.21, align 4
1020  %add.21 = fadd fast float %21, %add.20
1021  %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22
1022  %22 = load float, float* %arrayidx.22, align 4
1023  %add.22 = fadd fast float %22, %add.21
1024  %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23
1025  %23 = load float, float* %arrayidx.23, align 4
1026  %add.23 = fadd fast float %23, %add.22
1027  %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24
1028  %24 = load float, float* %arrayidx.24, align 4
1029  %add.24 = fadd fast float %24, %add.23
1030  %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25
1031  %25 = load float, float* %arrayidx.25, align 4
1032  %add.25 = fadd fast float %25, %add.24
1033  %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26
1034  %26 = load float, float* %arrayidx.26, align 4
1035  %add.26 = fadd fast float %26, %add.25
1036  %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27
1037  %27 = load float, float* %arrayidx.27, align 4
1038  %add.27 = fadd fast float %27, %add.26
1039  %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28
1040  %28 = load float, float* %arrayidx.28, align 4
1041  %add.28 = fadd fast float %28, %add.27
1042  %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29
1043  %29 = load float, float* %arrayidx.29, align 4
1044  %add.29 = fadd fast float %29, %add.28
1045  %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30
1046  %30 = load float, float* %arrayidx.30, align 4
1047  %add.30 = fadd fast float %30, %add.29
1048  %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31
1049  %31 = load float, float* %arrayidx.31, align 4
1050  %add.31 = fadd fast float %31, %add.30
1051  ret float %add.31
1052}
1053
1054define float @loadadd31(float* nocapture readonly %x) {
1055; CHECK-LABEL: @loadadd31(
1056; CHECK-NEXT:  entry:
1057; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1058; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
1059; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1060; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
1061; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
1062; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1063; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1064; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1065; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1066; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
1067; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
1068; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
1069; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
1070; CHECK-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
1071; CHECK-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
1072; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1073; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
1074; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
1075; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
1076; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
1077; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
1078; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
1079; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
1080; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
1081; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
1082; CHECK-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
1083; CHECK-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
1084; CHECK-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
1085; CHECK-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
1086; CHECK-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
1087; CHECK-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
1088; CHECK-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
1089; CHECK-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
1090; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
1091; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
1092; CHECK-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
1093; CHECK-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
1094; CHECK-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
1095; CHECK-NEXT:    [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
1096; CHECK-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
1097; CHECK-NEXT:    [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
1098; CHECK-NEXT:    [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
1099; CHECK-NEXT:    [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
1100; CHECK-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
1101; CHECK-NEXT:    [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
1102; CHECK-NEXT:    [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
1103; CHECK-NEXT:    [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
1104; CHECK-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
1105; CHECK-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
1106; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
1107; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
1108; CHECK-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
1109; CHECK-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
1110; CHECK-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
1111; CHECK-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
1112; CHECK-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
1113; CHECK-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
1114; CHECK-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
1115; CHECK-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
1116; CHECK-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
1117; CHECK-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
1118; CHECK-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
1119; CHECK-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
1120; CHECK-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
1121; CHECK-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
1122; CHECK-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
1123; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1124; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]]
1125; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1126; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <16 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1127; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1128; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1129; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1130; CHECK-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
1131; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0
1132; CHECK-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1133; CHECK-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <8 x float> [[TMP5]], [[RDX_SHUF7]]
1134; CHECK-NEXT:    [[RDX_SHUF9:%.*]] = shufflevector <8 x float> [[BIN_RDX8]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1135; CHECK-NEXT:    [[BIN_RDX10:%.*]] = fadd fast <8 x float> [[BIN_RDX8]], [[RDX_SHUF9]]
1136; CHECK-NEXT:    [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[BIN_RDX10]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1137; CHECK-NEXT:    [[BIN_RDX12:%.*]] = fadd fast <8 x float> [[BIN_RDX10]], [[RDX_SHUF11]]
1138; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[BIN_RDX12]], i32 0
1139; CHECK-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
1140; CHECK-NEXT:    [[RDX_SHUF13:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1141; CHECK-NEXT:    [[BIN_RDX14:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF13]]
1142; CHECK-NEXT:    [[RDX_SHUF15:%.*]] = shufflevector <4 x float> [[BIN_RDX14]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1143; CHECK-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <4 x float> [[BIN_RDX14]], [[RDX_SHUF15]]
1144; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[BIN_RDX16]], i32 0
1145; CHECK-NEXT:    [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
1146; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
1147; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
1148; CHECK-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
1149; CHECK-NEXT:    ret float [[TMP12]]
1150;
1151; THRESHOLD-LABEL: @loadadd31(
1152; THRESHOLD-NEXT:  entry:
1153; THRESHOLD-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1154; THRESHOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
1155; THRESHOLD-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1156; THRESHOLD-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
1157; THRESHOLD-NEXT:    [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
1158; THRESHOLD-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1159; THRESHOLD-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1160; THRESHOLD-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1161; THRESHOLD-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1162; THRESHOLD-NEXT:    [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
1163; THRESHOLD-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
1164; THRESHOLD-NEXT:    [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
1165; THRESHOLD-NEXT:    [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
1166; THRESHOLD-NEXT:    [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
1167; THRESHOLD-NEXT:    [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
1168; THRESHOLD-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1169; THRESHOLD-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
1170; THRESHOLD-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
1171; THRESHOLD-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10
1172; THRESHOLD-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11
1173; THRESHOLD-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12
1174; THRESHOLD-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13
1175; THRESHOLD-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
1176; THRESHOLD-NEXT:    [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
1177; THRESHOLD-NEXT:    [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
1178; THRESHOLD-NEXT:    [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
1179; THRESHOLD-NEXT:    [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
1180; THRESHOLD-NEXT:    [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
1181; THRESHOLD-NEXT:    [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
1182; THRESHOLD-NEXT:    [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
1183; THRESHOLD-NEXT:    [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
1184; THRESHOLD-NEXT:    [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
1185; THRESHOLD-NEXT:    [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
1186; THRESHOLD-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
1187; THRESHOLD-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
1188; THRESHOLD-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
1189; THRESHOLD-NEXT:    [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
1190; THRESHOLD-NEXT:    [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19
1191; THRESHOLD-NEXT:    [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20
1192; THRESHOLD-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21
1193; THRESHOLD-NEXT:    [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22
1194; THRESHOLD-NEXT:    [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23
1195; THRESHOLD-NEXT:    [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24
1196; THRESHOLD-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25
1197; THRESHOLD-NEXT:    [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26
1198; THRESHOLD-NEXT:    [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27
1199; THRESHOLD-NEXT:    [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28
1200; THRESHOLD-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29
1201; THRESHOLD-NEXT:    [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
1202; THRESHOLD-NEXT:    [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
1203; THRESHOLD-NEXT:    [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
1204; THRESHOLD-NEXT:    [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
1205; THRESHOLD-NEXT:    [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
1206; THRESHOLD-NEXT:    [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
1207; THRESHOLD-NEXT:    [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
1208; THRESHOLD-NEXT:    [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
1209; THRESHOLD-NEXT:    [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
1210; THRESHOLD-NEXT:    [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
1211; THRESHOLD-NEXT:    [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
1212; THRESHOLD-NEXT:    [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
1213; THRESHOLD-NEXT:    [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
1214; THRESHOLD-NEXT:    [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
1215; THRESHOLD-NEXT:    [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
1216; THRESHOLD-NEXT:    [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
1217; THRESHOLD-NEXT:    [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
1218; THRESHOLD-NEXT:    [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
1219; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1220; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]]
1221; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1222; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <16 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1223; THRESHOLD-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1224; THRESHOLD-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1225; THRESHOLD-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1226; THRESHOLD-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
1227; THRESHOLD-NEXT:    [[TMP8:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0
1228; THRESHOLD-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1229; THRESHOLD-NEXT:    [[BIN_RDX8:%.*]] = fadd fast <8 x float> [[TMP5]], [[RDX_SHUF7]]
1230; THRESHOLD-NEXT:    [[RDX_SHUF9:%.*]] = shufflevector <8 x float> [[BIN_RDX8]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1231; THRESHOLD-NEXT:    [[BIN_RDX10:%.*]] = fadd fast <8 x float> [[BIN_RDX8]], [[RDX_SHUF9]]
1232; THRESHOLD-NEXT:    [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[BIN_RDX10]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1233; THRESHOLD-NEXT:    [[BIN_RDX12:%.*]] = fadd fast <8 x float> [[BIN_RDX10]], [[RDX_SHUF11]]
1234; THRESHOLD-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[BIN_RDX12]], i32 0
1235; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
1236; THRESHOLD-NEXT:    [[RDX_SHUF13:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1237; THRESHOLD-NEXT:    [[BIN_RDX14:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF13]]
1238; THRESHOLD-NEXT:    [[RDX_SHUF15:%.*]] = shufflevector <4 x float> [[BIN_RDX14]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1239; THRESHOLD-NEXT:    [[BIN_RDX16:%.*]] = fadd fast <4 x float> [[BIN_RDX14]], [[RDX_SHUF15]]
1240; THRESHOLD-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[BIN_RDX16]], i32 0
1241; THRESHOLD-NEXT:    [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
1242; THRESHOLD-NEXT:    [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
1243; THRESHOLD-NEXT:    [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
1244; THRESHOLD-NEXT:    [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
1245; THRESHOLD-NEXT:    ret float [[TMP12]]
1246;
1247  entry:
1248  %arrayidx = getelementptr inbounds float, float* %x, i64 1
1249  %0 = load float, float* %arrayidx, align 4
1250  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 2
1251  %1 = load float, float* %arrayidx.1, align 4
1252  %add.1 = fadd fast float %1, %0
1253  %arrayidx.2 = getelementptr inbounds float, float* %x, i64 3
1254  %2 = load float, float* %arrayidx.2, align 4
1255  %add.2 = fadd fast float %2, %add.1
1256  %arrayidx.3 = getelementptr inbounds float, float* %x, i64 4
1257  %3 = load float, float* %arrayidx.3, align 4
1258  %add.3 = fadd fast float %3, %add.2
1259  %arrayidx.4 = getelementptr inbounds float, float* %x, i64 5
1260  %4 = load float, float* %arrayidx.4, align 4
1261  %add.4 = fadd fast float %4, %add.3
1262  %arrayidx.5 = getelementptr inbounds float, float* %x, i64 6
1263  %5 = load float, float* %arrayidx.5, align 4
1264  %add.5 = fadd fast float %5, %add.4
1265  %arrayidx.6 = getelementptr inbounds float, float* %x, i64 7
1266  %6 = load float, float* %arrayidx.6, align 4
1267  %add.6 = fadd fast float %6, %add.5
1268  %arrayidx.7 = getelementptr inbounds float, float* %x, i64 8
1269  %7 = load float, float* %arrayidx.7, align 4
1270  %add.7 = fadd fast float %7, %add.6
1271  %arrayidx.8 = getelementptr inbounds float, float* %x, i64 9
1272  %8 = load float, float* %arrayidx.8, align 4
1273  %add.8 = fadd fast float %8, %add.7
1274  %arrayidx.9 = getelementptr inbounds float, float* %x, i64 10
1275  %9 = load float, float* %arrayidx.9, align 4
1276  %add.9 = fadd fast float %9, %add.8
1277  %arrayidx.10 = getelementptr inbounds float, float* %x, i64 11
1278  %10 = load float, float* %arrayidx.10, align 4
1279  %add.10 = fadd fast float %10, %add.9
1280  %arrayidx.11 = getelementptr inbounds float, float* %x, i64 12
1281  %11 = load float, float* %arrayidx.11, align 4
1282  %add.11 = fadd fast float %11, %add.10
1283  %arrayidx.12 = getelementptr inbounds float, float* %x, i64 13
1284  %12 = load float, float* %arrayidx.12, align 4
1285  %add.12 = fadd fast float %12, %add.11
1286  %arrayidx.13 = getelementptr inbounds float, float* %x, i64 14
1287  %13 = load float, float* %arrayidx.13, align 4
1288  %add.13 = fadd fast float %13, %add.12
1289  %arrayidx.14 = getelementptr inbounds float, float* %x, i64 15
1290  %14 = load float, float* %arrayidx.14, align 4
1291  %add.14 = fadd fast float %14, %add.13
1292  %arrayidx.15 = getelementptr inbounds float, float* %x, i64 16
1293  %15 = load float, float* %arrayidx.15, align 4
1294  %add.15 = fadd fast float %15, %add.14
1295  %arrayidx.16 = getelementptr inbounds float, float* %x, i64 17
1296  %16 = load float, float* %arrayidx.16, align 4
1297  %add.16 = fadd fast float %16, %add.15
1298  %arrayidx.17 = getelementptr inbounds float, float* %x, i64 18
1299  %17 = load float, float* %arrayidx.17, align 4
1300  %add.17 = fadd fast float %17, %add.16
1301  %arrayidx.18 = getelementptr inbounds float, float* %x, i64 19
1302  %18 = load float, float* %arrayidx.18, align 4
1303  %add.18 = fadd fast float %18, %add.17
1304  %arrayidx.19 = getelementptr inbounds float, float* %x, i64 20
1305  %19 = load float, float* %arrayidx.19, align 4
1306  %add.19 = fadd fast float %19, %add.18
1307  %arrayidx.20 = getelementptr inbounds float, float* %x, i64 21
1308  %20 = load float, float* %arrayidx.20, align 4
1309  %add.20 = fadd fast float %20, %add.19
1310  %arrayidx.21 = getelementptr inbounds float, float* %x, i64 22
1311  %21 = load float, float* %arrayidx.21, align 4
1312  %add.21 = fadd fast float %21, %add.20
1313  %arrayidx.22 = getelementptr inbounds float, float* %x, i64 23
1314  %22 = load float, float* %arrayidx.22, align 4
1315  %add.22 = fadd fast float %22, %add.21
1316  %arrayidx.23 = getelementptr inbounds float, float* %x, i64 24
1317  %23 = load float, float* %arrayidx.23, align 4
1318  %add.23 = fadd fast float %23, %add.22
1319  %arrayidx.24 = getelementptr inbounds float, float* %x, i64 25
1320  %24 = load float, float* %arrayidx.24, align 4
1321  %add.24 = fadd fast float %24, %add.23
1322  %arrayidx.25 = getelementptr inbounds float, float* %x, i64 26
1323  %25 = load float, float* %arrayidx.25, align 4
1324  %add.25 = fadd fast float %25, %add.24
1325  %arrayidx.26 = getelementptr inbounds float, float* %x, i64 27
1326  %26 = load float, float* %arrayidx.26, align 4
1327  %add.26 = fadd fast float %26, %add.25
1328  %arrayidx.27 = getelementptr inbounds float, float* %x, i64 28
1329  %27 = load float, float* %arrayidx.27, align 4
1330  %add.27 = fadd fast float %27, %add.26
1331  %arrayidx.28 = getelementptr inbounds float, float* %x, i64 29
1332  %28 = load float, float* %arrayidx.28, align 4
1333  %add.28 = fadd fast float %28, %add.27
1334  %arrayidx.29 = getelementptr inbounds float, float* %x, i64 30
1335  %29 = load float, float* %arrayidx.29, align 4
1336  %add.29 = fadd fast float %29, %add.28
1337  ret float %add.29
1338}
1339
1340define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) {
1341; CHECK-LABEL: @extra_args(
1342; CHECK-NEXT:  entry:
1343; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1344; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1345; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
1346; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1347; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1348; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1349; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1350; CHECK-NEXT:    [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1351; CHECK-NEXT:    [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1352; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1353; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1354; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1355; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1356; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1357; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
1358; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
1359; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1360; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1361; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
1362; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
1363; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1364; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
1365; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1366; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1367; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1368; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1369; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
1370; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1371; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1372; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1373; CHECK-NEXT:    ret float [[OP_EXTRA5]]
1374;
1375; THRESHOLD-LABEL: @extra_args(
1376; THRESHOLD-NEXT:  entry:
1377; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1378; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1379; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
1380; THRESHOLD-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1381; THRESHOLD-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1382; THRESHOLD-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1383; THRESHOLD-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1384; THRESHOLD-NEXT:    [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1385; THRESHOLD-NEXT:    [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1386; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1387; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1388; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1389; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1390; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1391; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
1392; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
1393; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1394; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1395; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
1396; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
1397; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1398; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
1399; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1400; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1401; THRESHOLD-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1402; THRESHOLD-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1403; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
1404; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1405; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1406; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1407; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
1408;
1409  entry:
1410  %mul = mul nsw i32 %b, %a
1411  %conv = sitofp i32 %mul to float
1412  %0 = load float, float* %x, align 4
1413  %add = fadd fast float %conv, 3.000000e+00
1414  %add1 = fadd fast float %0, %add
1415  %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
1416  %1 = load float, float* %arrayidx3, align 4
1417  %add4 = fadd fast float %1, %add1
1418  %add5 = fadd fast float %add4, %conv
1419  %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
1420  %2 = load float, float* %arrayidx3.1, align 4
1421  %add4.1 = fadd fast float %2, %add5
1422  %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
1423  %3 = load float, float* %arrayidx3.2, align 4
1424  %add4.2 = fadd fast float %3, %add4.1
1425  %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
1426  %4 = load float, float* %arrayidx3.3, align 4
1427  %add4.3 = fadd fast float %4, %add4.2
1428  %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
1429  %5 = load float, float* %arrayidx3.4, align 4
1430  %add4.4 = fadd fast float %5, %add4.3
1431  %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
1432  %6 = load float, float* %arrayidx3.5, align 4
1433  %add4.5 = fadd fast float %6, %add4.4
1434  %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
1435  %7 = load float, float* %arrayidx3.6, align 4
1436  %add4.6 = fadd fast float %7, %add4.5
1437  ret float %add4.6
1438}
1439
1440define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a, i32 %b) {
1441; CHECK-LABEL: @extra_args_same_several_times(
1442; CHECK-NEXT:  entry:
1443; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1444; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1445; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
1446; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1447; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1448; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1449; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1450; CHECK-NEXT:    [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1451; CHECK-NEXT:    [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1452; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1453; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1454; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1455; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1456; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1457; CHECK-NEXT:    [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
1458; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
1459; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
1460; CHECK-NEXT:    [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00
1461; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
1462; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1463; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
1464; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
1465; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1466; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
1467; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1468; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1469; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1470; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1471; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
1472; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1473; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
1474; CHECK-NEXT:    [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00
1475; CHECK-NEXT:    [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]]
1476; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1477; CHECK-NEXT:    ret float [[OP_EXTRA7]]
1478;
1479; THRESHOLD-LABEL: @extra_args_same_several_times(
1480; THRESHOLD-NEXT:  entry:
1481; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1482; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1483; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00
1484; THRESHOLD-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1485; THRESHOLD-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1486; THRESHOLD-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1487; THRESHOLD-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1488; THRESHOLD-NEXT:    [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1489; THRESHOLD-NEXT:    [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1490; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1491; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1492; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1493; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1494; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1495; THRESHOLD-NEXT:    [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
1496; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
1497; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
1498; THRESHOLD-NEXT:    [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00
1499; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
1500; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1501; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
1502; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
1503; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1504; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
1505; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1506; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1507; THRESHOLD-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1508; THRESHOLD-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1509; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
1510; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1511; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
1512; THRESHOLD-NEXT:    [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00
1513; THRESHOLD-NEXT:    [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]]
1514; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1515; THRESHOLD-NEXT:    ret float [[OP_EXTRA7]]
1516;
1517  entry:
1518  %mul = mul nsw i32 %b, %a
1519  %conv = sitofp i32 %mul to float
1520  %0 = load float, float* %x, align 4
1521  %add = fadd fast float %conv, 3.000000e+00
1522  %add1 = fadd fast float %0, %add
1523  %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
1524  %1 = load float, float* %arrayidx3, align 4
1525  %add4 = fadd fast float %1, %add1
1526  %add41 = fadd fast float %add4, 5.000000e+00
1527  %add5 = fadd fast float %add41, %conv
1528  %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
1529  %2 = load float, float* %arrayidx3.1, align 4
1530  %add4.1 = fadd fast float %2, %add5
1531  %add4.11 = fadd fast float %add4.1, 5.000000e+00
1532  %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
1533  %3 = load float, float* %arrayidx3.2, align 4
1534  %add4.2 = fadd fast float %3, %add4.11
1535  %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
1536  %4 = load float, float* %arrayidx3.3, align 4
1537  %add4.3 = fadd fast float %4, %add4.2
1538  %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
1539  %5 = load float, float* %arrayidx3.4, align 4
1540  %add4.4 = fadd fast float %5, %add4.3
1541  %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
1542  %6 = load float, float* %arrayidx3.5, align 4
1543  %add4.5 = fadd fast float %6, %add4.4
1544  %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
1545  %7 = load float, float* %arrayidx3.6, align 4
1546  %add4.6 = fadd fast float %7, %add4.5
1547  ret float %add4.6
1548}
1549
1550define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b, i32 %c) {
1551; CHECK-LABEL: @extra_args_no_replace(
1552; CHECK-NEXT:  entry:
1553; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1554; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1555; CHECK-NEXT:    [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
1556; CHECK-NEXT:    [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00
1557; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]
1558; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1559; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1560; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1561; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1562; CHECK-NEXT:    [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1563; CHECK-NEXT:    [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1564; CHECK-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1565; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1566; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1567; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1568; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1569; CHECK-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
1570; CHECK-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1571; CHECK-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1572; CHECK-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
1573; CHECK-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
1574; CHECK-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
1575; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1576; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
1577; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1578; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1579; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1580; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1581; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
1582; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1583; CHECK-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1584; CHECK-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1585; CHECK-NEXT:    ret float [[OP_EXTRA5]]
1586;
1587; THRESHOLD-LABEL: @extra_args_no_replace(
1588; THRESHOLD-NEXT:  entry:
1589; THRESHOLD-NEXT:    [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]]
1590; THRESHOLD-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
1591; THRESHOLD-NEXT:    [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float
1592; THRESHOLD-NEXT:    [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00
1593; THRESHOLD-NEXT:    [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]]
1594; THRESHOLD-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1
1595; THRESHOLD-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1596; THRESHOLD-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1597; THRESHOLD-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1598; THRESHOLD-NEXT:    [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1599; THRESHOLD-NEXT:    [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1600; THRESHOLD-NEXT:    [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1601; THRESHOLD-NEXT:    [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
1602; THRESHOLD-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1603; THRESHOLD-NEXT:    [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1604; THRESHOLD-NEXT:    [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1605; THRESHOLD-NEXT:    [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
1606; THRESHOLD-NEXT:    [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1607; THRESHOLD-NEXT:    [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1608; THRESHOLD-NEXT:    [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
1609; THRESHOLD-NEXT:    [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
1610; THRESHOLD-NEXT:    [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
1611; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1612; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
1613; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1614; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]]
1615; THRESHOLD-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1616; THRESHOLD-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
1617; THRESHOLD-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
1618; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
1619; THRESHOLD-NEXT:    [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1620; THRESHOLD-NEXT:    [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
1621; THRESHOLD-NEXT:    ret float [[OP_EXTRA5]]
1622;
1623  entry:
1624  %mul = mul nsw i32 %b, %a
1625  %conv = sitofp i32 %mul to float
1626  %0 = load float, float* %x, align 4
1627  %convc = sitofp i32 %c to float
1628  %addc = fadd fast float %convc, 3.000000e+00
1629  %add = fadd fast float %conv, %addc
1630  %add1 = fadd fast float %0, %add
1631  %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
1632  %1 = load float, float* %arrayidx3, align 4
1633  %add4 = fadd fast float %1, %add1
1634  %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
1635  %2 = load float, float* %arrayidx3.1, align 4
1636  %add4.1 = fadd fast float %2, %add4
1637  %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
1638  %3 = load float, float* %arrayidx3.2, align 4
1639  %add4.2 = fadd fast float %3, %add4.1
1640  %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4
1641  %4 = load float, float* %arrayidx3.3, align 4
1642  %add4.3 = fadd fast float %4, %add4.2
1643  %add5 = fadd fast float %add4.3, %conv
1644  %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5
1645  %5 = load float, float* %arrayidx3.4, align 4
1646  %add4.4 = fadd fast float %5, %add5
1647  %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6
1648  %6 = load float, float* %arrayidx3.5, align 4
1649  %add4.5 = fadd fast float %6, %add4.4
1650  %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7
1651  %7 = load float, float* %arrayidx3.6, align 4
1652  %add4.6 = fadd fast float %7, %add4.5
1653  ret float %add4.6
1654}
1655
1656define i32 @wobble(i32 %arg, i32 %bar) {
1657; CHECK-LABEL: @wobble(
1658; CHECK-NEXT:  bb:
1659; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[ARG:%.*]], i32 0
1660; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1
1661; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2
1662; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3
1663; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[BAR:%.*]], i32 0
1664; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1
1665; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2
1666; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3
1667; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]]
1668; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
1669; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer
1670; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
1671; CHECK-NEXT:    [[R1:%.*]] = add nuw i32 [[ARG]], undef
1672; CHECK-NEXT:    [[R2:%.*]] = add nsw i32 [[R1]], undef
1673; CHECK-NEXT:    [[R3:%.*]] = add nsw i32 [[R2]], undef
1674; CHECK-NEXT:    [[R4:%.*]] = add nsw i32 [[R3]], undef
1675; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1676; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
1677; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1678; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
1679; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
1680; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
1681; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
1682; CHECK-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
1683; CHECK-NEXT:    ret i32 [[OP_EXTRA3]]
1684;
1685; THRESHOLD-LABEL: @wobble(
1686; THRESHOLD-NEXT:  bb:
1687; THRESHOLD-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[ARG:%.*]], i32 0
1688; THRESHOLD-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1
1689; THRESHOLD-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2
1690; THRESHOLD-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3
1691; THRESHOLD-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[BAR:%.*]], i32 0
1692; THRESHOLD-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1
1693; THRESHOLD-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2
1694; THRESHOLD-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3
1695; THRESHOLD-NEXT:    [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]]
1696; THRESHOLD-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
1697; THRESHOLD-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer
1698; THRESHOLD-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
1699; THRESHOLD-NEXT:    [[R1:%.*]] = add nuw i32 [[ARG]], undef
1700; THRESHOLD-NEXT:    [[R2:%.*]] = add nsw i32 [[R1]], undef
1701; THRESHOLD-NEXT:    [[R3:%.*]] = add nsw i32 [[R2]], undef
1702; THRESHOLD-NEXT:    [[R4:%.*]] = add nsw i32 [[R3]], undef
1703; THRESHOLD-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1704; THRESHOLD-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
1705; THRESHOLD-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1706; THRESHOLD-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
1707; THRESHOLD-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
1708; THRESHOLD-NEXT:    [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
1709; THRESHOLD-NEXT:    [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
1710; THRESHOLD-NEXT:    [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
1711; THRESHOLD-NEXT:    ret i32 [[OP_EXTRA3]]
1712;
1713  bb:
1714  %x1 = xor i32 %arg, %bar
1715  %i1 = icmp eq i32 %x1, 0
1716  %s1 = sext i1 %i1 to i32
1717  %x2 = xor i32 %arg, %bar
1718  %i2 = icmp eq i32 %x2, 0
1719  %s2 = sext i1 %i2 to i32
1720  %x3 = xor i32 %arg, %bar
1721  %i3 = icmp eq i32 %x3, 0
1722  %s3 = sext i1 %i3 to i32
1723  %x4 = xor i32 %arg, %bar
1724  %i4 = icmp eq i32 %x4, 0
1725  %s4 = sext i1 %i4 to i32
1726  %r1 = add nuw i32 %arg, %s1
1727  %r2 = add nsw i32 %r1, %s2
1728  %r3 = add nsw i32 %r2, %s3
1729  %r4 = add nsw i32 %r3, %s4
1730  %r5 = add nsw i32 %r4, %x4
1731  ret i32 %r5
1732}
1733
1734