1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
3; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
4; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
5
6; Incremental updates of the instruction depths should be enough for this test
7; case.
8; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=sse -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
9; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=avx -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
10; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=avx512vl -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
11
12; Verify that the first two adds are independent regardless of how the inputs are
13; commuted. The destination registers are used as source registers for the third add.
14
15define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
16; SSE-LABEL: reassociate_adds1:
17; SSE:       # %bb.0:
18; SSE-NEXT:    addss %xmm1, %xmm0
19; SSE-NEXT:    addss %xmm3, %xmm2
20; SSE-NEXT:    addss %xmm2, %xmm0
21; SSE-NEXT:    retq
22;
23; AVX-LABEL: reassociate_adds1:
24; AVX:       # %bb.0:
25; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
26; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
27; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
28; AVX-NEXT:    retq
29  %t0 = fadd reassoc nsz float %x0, %x1
30  %t1 = fadd reassoc nsz float %t0, %x2
31  %t2 = fadd reassoc nsz float %t1, %x3
32  ret float %t2
33}
34
35define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
36; SSE-LABEL: reassociate_adds2:
37; SSE:       # %bb.0:
38; SSE-NEXT:    addss %xmm1, %xmm0
39; SSE-NEXT:    addss %xmm3, %xmm2
40; SSE-NEXT:    addss %xmm2, %xmm0
41; SSE-NEXT:    retq
42;
43; AVX-LABEL: reassociate_adds2:
44; AVX:       # %bb.0:
45; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
46; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
47; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
48; AVX-NEXT:    retq
49  %t0 = fadd reassoc nsz float %x0, %x1
50  %t1 = fadd reassoc nsz float %x2, %t0
51  %t2 = fadd reassoc nsz float %t1, %x3
52  ret float %t2
53}
54
55define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
56; SSE-LABEL: reassociate_adds3:
57; SSE:       # %bb.0:
58; SSE-NEXT:    addss %xmm1, %xmm0
59; SSE-NEXT:    addss %xmm3, %xmm2
60; SSE-NEXT:    addss %xmm2, %xmm0
61; SSE-NEXT:    retq
62;
63; AVX-LABEL: reassociate_adds3:
64; AVX:       # %bb.0:
65; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
66; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
67; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
68; AVX-NEXT:    retq
69  %t0 = fadd reassoc nsz float %x0, %x1
70  %t1 = fadd reassoc nsz float %t0, %x2
71  %t2 = fadd reassoc nsz float %x3, %t1
72  ret float %t2
73}
74
75define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
76; SSE-LABEL: reassociate_adds4:
77; SSE:       # %bb.0:
78; SSE-NEXT:    addss %xmm1, %xmm0
79; SSE-NEXT:    addss %xmm3, %xmm2
80; SSE-NEXT:    addss %xmm2, %xmm0
81; SSE-NEXT:    retq
82;
83; AVX-LABEL: reassociate_adds4:
84; AVX:       # %bb.0:
85; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
86; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
87; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
88; AVX-NEXT:    retq
89  %t0 = fadd reassoc nsz float %x0, %x1
90  %t1 = fadd reassoc nsz float %x2, %t0
91  %t2 = fadd reassoc nsz float %x3, %t1
92  ret float %t2
93}
94
95; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
96; produced because that would cost more compile time.
97
98define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
99; SSE-LABEL: reassociate_adds5:
100; SSE:       # %bb.0:
101; SSE-NEXT:    addss %xmm1, %xmm0
102; SSE-NEXT:    addss %xmm3, %xmm2
103; SSE-NEXT:    addss %xmm2, %xmm0
104; SSE-NEXT:    addss %xmm5, %xmm4
105; SSE-NEXT:    addss %xmm6, %xmm4
106; SSE-NEXT:    addss %xmm4, %xmm0
107; SSE-NEXT:    addss %xmm7, %xmm0
108; SSE-NEXT:    retq
109;
110; AVX-LABEL: reassociate_adds5:
111; AVX:       # %bb.0:
112; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
113; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
114; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
115; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
116; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
117; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
118; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
119; AVX-NEXT:    retq
120  %t0 = fadd reassoc nsz float %x0, %x1
121  %t1 = fadd reassoc nsz float %t0, %x2
122  %t2 = fadd reassoc nsz float %t1, %x3
123  %t3 = fadd reassoc nsz float %t2, %x4
124  %t4 = fadd reassoc nsz float %t3, %x5
125  %t5 = fadd reassoc nsz float %t4, %x6
126  %t6 = fadd reassoc nsz float %t5, %x7
127  ret float %t6
128}
129
130; Verify that we only need two associative operations to reassociate the operands.
131; Also, we should reassociate such that the result of the high latency division
132; is used by the final 'add' rather than reassociating the %x3 operand with the
133; division. The latter reassociation would not improve anything.
134
135define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
136; SSE-LABEL: reassociate_adds6:
137; SSE:       # %bb.0:
138; SSE-NEXT:    divss %xmm1, %xmm0
139; SSE-NEXT:    addss %xmm3, %xmm2
140; SSE-NEXT:    addss %xmm2, %xmm0
141; SSE-NEXT:    retq
142;
143; AVX-LABEL: reassociate_adds6:
144; AVX:       # %bb.0:
145; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
146; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
147; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
148; AVX-NEXT:    retq
149  %t0 = fdiv reassoc nsz float %x0, %x1
150  %t1 = fadd reassoc nsz float %x2, %t0
151  %t2 = fadd reassoc nsz float %x3, %t1
152  ret float %t2
153}
154
155; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
156
157define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
158; SSE-LABEL: reassociate_muls1:
159; SSE:       # %bb.0:
160; SSE-NEXT:    divss %xmm1, %xmm0
161; SSE-NEXT:    mulss %xmm3, %xmm2
162; SSE-NEXT:    mulss %xmm2, %xmm0
163; SSE-NEXT:    retq
164;
165; AVX-LABEL: reassociate_muls1:
166; AVX:       # %bb.0:
167; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
168; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
169; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
170; AVX-NEXT:    retq
171  %t0 = fdiv reassoc nsz float %x0, %x1
172  %t1 = fmul reassoc nsz float %x2, %t0
173  %t2 = fmul reassoc nsz float %x3, %t1
174  ret float %t2
175}
176
177; Verify that SSE and AVX scalar double-precision adds are reassociated.
178
179define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
180; SSE-LABEL: reassociate_adds_double:
181; SSE:       # %bb.0:
182; SSE-NEXT:    divsd %xmm1, %xmm0
183; SSE-NEXT:    addsd %xmm3, %xmm2
184; SSE-NEXT:    addsd %xmm2, %xmm0
185; SSE-NEXT:    retq
186;
187; AVX-LABEL: reassociate_adds_double:
188; AVX:       # %bb.0:
189; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
190; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
191; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
192; AVX-NEXT:    retq
193  %t0 = fdiv reassoc nsz double %x0, %x1
194  %t1 = fadd reassoc nsz double %x2, %t0
195  %t2 = fadd reassoc nsz double %x3, %t1
196  ret double %t2
197}
198
199; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
200
201define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
202; SSE-LABEL: reassociate_muls_double:
203; SSE:       # %bb.0:
204; SSE-NEXT:    divsd %xmm1, %xmm0
205; SSE-NEXT:    mulsd %xmm3, %xmm2
206; SSE-NEXT:    mulsd %xmm2, %xmm0
207; SSE-NEXT:    retq
208;
209; AVX-LABEL: reassociate_muls_double:
210; AVX:       # %bb.0:
211; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
212; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
213; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
214; AVX-NEXT:    retq
215  %t0 = fdiv reassoc nsz double %x0, %x1
216  %t1 = fmul reassoc nsz double %x2, %t0
217  %t2 = fmul reassoc nsz double %x3, %t1
218  ret double %t2
219}
220
221; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
222
223define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
224; SSE-LABEL: reassociate_adds_v4f32:
225; SSE:       # %bb.0:
226; SSE-NEXT:    mulps %xmm1, %xmm0
227; SSE-NEXT:    addps %xmm3, %xmm2
228; SSE-NEXT:    addps %xmm2, %xmm0
229; SSE-NEXT:    retq
230;
231; AVX1-LABEL: reassociate_adds_v4f32:
232; AVX1:       # %bb.0:
233; AVX1-NEXT:    vmulps %xmm1, %xmm0, %xmm0
234; AVX1-NEXT:    vaddps %xmm3, %xmm2, %xmm1
235; AVX1-NEXT:    vaddps %xmm1, %xmm0, %xmm0
236; AVX1-NEXT:    retq
237;
238; AVX512-LABEL: reassociate_adds_v4f32:
239; AVX512:       # %bb.0:
240; AVX512-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
241; AVX512-NEXT:    vaddps %xmm0, %xmm3, %xmm0
242; AVX512-NEXT:    retq
243  %t0 = fmul contract reassoc nsz <4 x float> %x0, %x1
244  %t1 = fadd contract reassoc nsz <4 x float> %x2, %t0
245  %t2 = fadd reassoc nsz <4 x float> %x3, %t1
246  ret <4 x float> %t2
247}
248
249; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
250
251define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
252; SSE-LABEL: reassociate_adds_v2f64:
253; SSE:       # %bb.0:
254; SSE-NEXT:    mulpd %xmm1, %xmm0
255; SSE-NEXT:    addpd %xmm3, %xmm2
256; SSE-NEXT:    addpd %xmm2, %xmm0
257; SSE-NEXT:    retq
258;
259; AVX1-LABEL: reassociate_adds_v2f64:
260; AVX1:       # %bb.0:
261; AVX1-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
262; AVX1-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
263; AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
264; AVX1-NEXT:    retq
265;
266; AVX512-LABEL: reassociate_adds_v2f64:
267; AVX512:       # %bb.0:
268; AVX512-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
269; AVX512-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
270; AVX512-NEXT:    retq
271  %t0 = fmul contract reassoc nsz <2 x double> %x0, %x1
272  %t1 = fadd contract reassoc nsz <2 x double> %x2, %t0
273  %t2 = fadd reassoc nsz <2 x double> %x3, %t1
274  ret <2 x double> %t2
275}
276
277; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
278
279define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
280; SSE-LABEL: reassociate_muls_v4f32:
281; SSE:       # %bb.0:
282; SSE-NEXT:    addps %xmm1, %xmm0
283; SSE-NEXT:    mulps %xmm3, %xmm2
284; SSE-NEXT:    mulps %xmm2, %xmm0
285; SSE-NEXT:    retq
286;
287; AVX-LABEL: reassociate_muls_v4f32:
288; AVX:       # %bb.0:
289; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
290; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
291; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
292; AVX-NEXT:    retq
293  %t0 = fadd reassoc nsz <4 x float> %x0, %x1
294  %t1 = fmul reassoc nsz <4 x float> %x2, %t0
295  %t2 = fmul reassoc nsz <4 x float> %x3, %t1
296  ret <4 x float> %t2
297}
298
299; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
300
301define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
302; SSE-LABEL: reassociate_muls_v2f64:
303; SSE:       # %bb.0:
304; SSE-NEXT:    addpd %xmm1, %xmm0
305; SSE-NEXT:    mulpd %xmm3, %xmm2
306; SSE-NEXT:    mulpd %xmm2, %xmm0
307; SSE-NEXT:    retq
308;
309; AVX-LABEL: reassociate_muls_v2f64:
310; AVX:       # %bb.0:
311; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
312; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
313; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
314; AVX-NEXT:    retq
315  %t0 = fadd reassoc nsz <2 x double> %x0, %x1
316  %t1 = fmul reassoc nsz <2 x double> %x2, %t0
317  %t2 = fmul reassoc nsz <2 x double> %x3, %t1
318  ret <2 x double> %t2
319}
320
321; Verify that AVX 256-bit vector single-precision adds are reassociated.
322
323define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
324; SSE-LABEL: reassociate_adds_v8f32:
325; SSE:       # %bb.0:
326; SSE-NEXT:    mulps %xmm2, %xmm0
327; SSE-NEXT:    mulps %xmm3, %xmm1
328; SSE-NEXT:    addps %xmm6, %xmm4
329; SSE-NEXT:    addps %xmm4, %xmm0
330; SSE-NEXT:    addps %xmm7, %xmm5
331; SSE-NEXT:    addps %xmm5, %xmm1
332; SSE-NEXT:    retq
333;
334; AVX1-LABEL: reassociate_adds_v8f32:
335; AVX1:       # %bb.0:
336; AVX1-NEXT:    vmulps %ymm1, %ymm0, %ymm0
337; AVX1-NEXT:    vaddps %ymm3, %ymm2, %ymm1
338; AVX1-NEXT:    vaddps %ymm1, %ymm0, %ymm0
339; AVX1-NEXT:    retq
340;
341; AVX512-LABEL: reassociate_adds_v8f32:
342; AVX512:       # %bb.0:
343; AVX512-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
344; AVX512-NEXT:    vaddps %ymm0, %ymm3, %ymm0
345; AVX512-NEXT:    retq
346  %t0 = fmul contract reassoc nsz <8 x float> %x0, %x1
347  %t1 = fadd contract reassoc nsz <8 x float> %x2, %t0
348  %t2 = fadd reassoc nsz <8 x float> %x3, %t1
349  ret <8 x float> %t2
350}
351
352; Verify that AVX 256-bit vector double-precision adds are reassociated.
353
354define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
355; SSE-LABEL: reassociate_adds_v4f64:
356; SSE:       # %bb.0:
357; SSE-NEXT:    mulpd %xmm2, %xmm0
358; SSE-NEXT:    mulpd %xmm3, %xmm1
359; SSE-NEXT:    addpd %xmm6, %xmm4
360; SSE-NEXT:    addpd %xmm4, %xmm0
361; SSE-NEXT:    addpd %xmm7, %xmm5
362; SSE-NEXT:    addpd %xmm5, %xmm1
363; SSE-NEXT:    retq
364;
365; AVX1-LABEL: reassociate_adds_v4f64:
366; AVX1:       # %bb.0:
367; AVX1-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
368; AVX1-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
369; AVX1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
370; AVX1-NEXT:    retq
371;
372; AVX512-LABEL: reassociate_adds_v4f64:
373; AVX512:       # %bb.0:
374; AVX512-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
375; AVX512-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
376; AVX512-NEXT:    retq
377  %t0 = fmul contract reassoc nsz <4 x double> %x0, %x1
378  %t1 = fadd contract reassoc nsz <4 x double> %x2, %t0
379  %t2 = fadd reassoc nsz <4 x double> %x3, %t1
380  ret <4 x double> %t2
381}
382
383; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
384
385define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
386; SSE-LABEL: reassociate_muls_v8f32:
387; SSE:       # %bb.0:
388; SSE-NEXT:    addps %xmm2, %xmm0
389; SSE-NEXT:    addps %xmm3, %xmm1
390; SSE-NEXT:    mulps %xmm6, %xmm4
391; SSE-NEXT:    mulps %xmm4, %xmm0
392; SSE-NEXT:    mulps %xmm7, %xmm5
393; SSE-NEXT:    mulps %xmm5, %xmm1
394; SSE-NEXT:    retq
395;
396; AVX-LABEL: reassociate_muls_v8f32:
397; AVX:       # %bb.0:
398; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
399; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
400; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
401; AVX-NEXT:    retq
402  %t0 = fadd reassoc nsz <8 x float> %x0, %x1
403  %t1 = fmul reassoc nsz <8 x float> %x2, %t0
404  %t2 = fmul reassoc nsz <8 x float> %x3, %t1
405  ret <8 x float> %t2
406}
407
408; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
409
410define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
411; SSE-LABEL: reassociate_muls_v4f64:
412; SSE:       # %bb.0:
413; SSE-NEXT:    addpd %xmm2, %xmm0
414; SSE-NEXT:    addpd %xmm3, %xmm1
415; SSE-NEXT:    mulpd %xmm6, %xmm4
416; SSE-NEXT:    mulpd %xmm4, %xmm0
417; SSE-NEXT:    mulpd %xmm7, %xmm5
418; SSE-NEXT:    mulpd %xmm5, %xmm1
419; SSE-NEXT:    retq
420;
421; AVX-LABEL: reassociate_muls_v4f64:
422; AVX:       # %bb.0:
423; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
424; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
425; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
426; AVX-NEXT:    retq
427  %t0 = fadd reassoc nsz <4 x double> %x0, %x1
428  %t1 = fmul reassoc nsz <4 x double> %x2, %t0
429  %t2 = fmul reassoc nsz  <4 x double> %x3, %t1
430  ret <4 x double> %t2
431}
432
433; Verify that AVX512 512-bit vector single-precision adds are reassociated.
434
435define <16 x float> @reassociate_adds_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
436; SSE-LABEL: reassociate_adds_v16f32:
437; SSE:       # %bb.0:
438; SSE-NEXT:    mulps %xmm4, %xmm0
439; SSE-NEXT:    mulps %xmm5, %xmm1
440; SSE-NEXT:    mulps %xmm6, %xmm2
441; SSE-NEXT:    mulps %xmm7, %xmm3
442; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm3
443; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm2
444; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm1
445; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm0
446; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm0
447; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm1
448; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm2
449; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm3
450; SSE-NEXT:    retq
451;
452; AVX1-LABEL: reassociate_adds_v16f32:
453; AVX1:       # %bb.0:
454; AVX1-NEXT:    vmulps %ymm2, %ymm0, %ymm0
455; AVX1-NEXT:    vmulps %ymm3, %ymm1, %ymm1
456; AVX1-NEXT:    vaddps %ymm6, %ymm4, %ymm2
457; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm0
458; AVX1-NEXT:    vaddps %ymm7, %ymm5, %ymm2
459; AVX1-NEXT:    vaddps %ymm2, %ymm1, %ymm1
460; AVX1-NEXT:    retq
461;
462; AVX512-LABEL: reassociate_adds_v16f32:
463; AVX512:       # %bb.0:
464; AVX512-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
465; AVX512-NEXT:    vaddps %zmm0, %zmm3, %zmm0
466; AVX512-NEXT:    retq
467  %t0 = fmul contract reassoc nsz <16 x float> %x0, %x1
468  %t1 = fadd contract reassoc nsz <16 x float> %x2, %t0
469  %t2 = fadd reassoc nsz <16 x float> %x3, %t1
470  ret <16 x float> %t2
471}
472
473; Verify that AVX512 512-bit vector double-precision adds are reassociated.
474
475define <8 x double> @reassociate_adds_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
476; SSE-LABEL: reassociate_adds_v8f64:
477; SSE:       # %bb.0:
478; SSE-NEXT:    mulpd %xmm4, %xmm0
479; SSE-NEXT:    mulpd %xmm5, %xmm1
480; SSE-NEXT:    mulpd %xmm6, %xmm2
481; SSE-NEXT:    mulpd %xmm7, %xmm3
482; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm3
483; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm2
484; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm1
485; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm0
486; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm0
487; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm1
488; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm2
489; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm3
490; SSE-NEXT:    retq
491;
492; AVX1-LABEL: reassociate_adds_v8f64:
493; AVX1:       # %bb.0:
494; AVX1-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
495; AVX1-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
496; AVX1-NEXT:    vaddpd %ymm6, %ymm4, %ymm2
497; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
498; AVX1-NEXT:    vaddpd %ymm7, %ymm5, %ymm2
499; AVX1-NEXT:    vaddpd %ymm2, %ymm1, %ymm1
500; AVX1-NEXT:    retq
501;
502; AVX512-LABEL: reassociate_adds_v8f64:
503; AVX512:       # %bb.0:
504; AVX512-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
505; AVX512-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
506; AVX512-NEXT:    retq
507  %t0 = fmul contract reassoc nsz <8 x double> %x0, %x1
508  %t1 = fadd contract reassoc nsz <8 x double> %x2, %t0
509  %t2 = fadd reassoc nsz <8 x double> %x3, %t1
510  ret <8 x double> %t2
511}
512
513; Verify that AVX512 512-bit vector single-precision multiplies are reassociated.
514
515define <16 x float> @reassociate_muls_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
516; SSE-LABEL: reassociate_muls_v16f32:
517; SSE:       # %bb.0:
518; SSE-NEXT:    addps %xmm4, %xmm0
519; SSE-NEXT:    addps %xmm5, %xmm1
520; SSE-NEXT:    addps %xmm6, %xmm2
521; SSE-NEXT:    addps %xmm7, %xmm3
522; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm3
523; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm2
524; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm1
525; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm0
526; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm0
527; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm1
528; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm2
529; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm3
530; SSE-NEXT:    retq
531;
532; AVX1-LABEL: reassociate_muls_v16f32:
533; AVX1:       # %bb.0:
534; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm0
535; AVX1-NEXT:    vaddps %ymm3, %ymm1, %ymm1
536; AVX1-NEXT:    vmulps %ymm6, %ymm4, %ymm2
537; AVX1-NEXT:    vmulps %ymm2, %ymm0, %ymm0
538; AVX1-NEXT:    vmulps %ymm7, %ymm5, %ymm2
539; AVX1-NEXT:    vmulps %ymm2, %ymm1, %ymm1
540; AVX1-NEXT:    retq
541;
542; AVX512-LABEL: reassociate_muls_v16f32:
543; AVX512:       # %bb.0:
544; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0
545; AVX512-NEXT:    vmulps %zmm3, %zmm2, %zmm1
546; AVX512-NEXT:    vmulps %zmm1, %zmm0, %zmm0
547; AVX512-NEXT:    retq
548  %t0 = fadd reassoc nsz <16 x float> %x0, %x1
549  %t1 = fmul reassoc nsz <16 x float> %x2, %t0
550  %t2 = fmul reassoc nsz <16 x float> %x3, %t1
551  ret <16 x float> %t2
552}
553
554; Verify that AVX512 512-bit vector double-precision multiplies are reassociated.
555
556define <8 x double> @reassociate_muls_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
557; SSE-LABEL: reassociate_muls_v8f64:
558; SSE:       # %bb.0:
559; SSE-NEXT:    addpd %xmm4, %xmm0
560; SSE-NEXT:    addpd %xmm5, %xmm1
561; SSE-NEXT:    addpd %xmm6, %xmm2
562; SSE-NEXT:    addpd %xmm7, %xmm3
563; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm3
564; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm2
565; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm1
566; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm0
567; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm0
568; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm1
569; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm2
570; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm3
571; SSE-NEXT:    retq
572;
573; AVX1-LABEL: reassociate_muls_v8f64:
574; AVX1:       # %bb.0:
575; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
576; AVX1-NEXT:    vaddpd %ymm3, %ymm1, %ymm1
577; AVX1-NEXT:    vmulpd %ymm6, %ymm4, %ymm2
578; AVX1-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
579; AVX1-NEXT:    vmulpd %ymm7, %ymm5, %ymm2
580; AVX1-NEXT:    vmulpd %ymm2, %ymm1, %ymm1
581; AVX1-NEXT:    retq
582;
583; AVX512-LABEL: reassociate_muls_v8f64:
584; AVX512:       # %bb.0:
585; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
586; AVX512-NEXT:    vmulpd %zmm3, %zmm2, %zmm1
587; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0
588; AVX512-NEXT:    retq
589  %t0 = fadd reassoc nsz <8 x double> %x0, %x1
590  %t1 = fmul reassoc nsz <8 x double> %x2, %t0
591  %t2 = fmul reassoc nsz <8 x double> %x3, %t1
592  ret <8 x double> %t2
593}
594
595; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
596
597define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
598; SSE-LABEL: reassociate_mins_single:
599; SSE:       # %bb.0:
600; SSE-NEXT:    divss %xmm1, %xmm0
601; SSE-NEXT:    minss %xmm3, %xmm2
602; SSE-NEXT:    minss %xmm2, %xmm0
603; SSE-NEXT:    retq
604;
605; AVX-LABEL: reassociate_mins_single:
606; AVX:       # %bb.0:
607; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
608; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
609; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
610; AVX-NEXT:    retq
611  %t0 = fdiv float %x0, %x1
612  %cmp1 = fcmp olt float %x2, %t0
613  %sel1 = select i1 %cmp1, float %x2, float %t0
614  %cmp2 = fcmp olt float %x3, %sel1
615  %sel2 = select i1 %cmp2, float %x3, float %sel1
616  ret float %sel2
617}
618
619; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
620
621define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
622; SSE-LABEL: reassociate_maxs_single:
623; SSE:       # %bb.0:
624; SSE-NEXT:    divss %xmm1, %xmm0
625; SSE-NEXT:    maxss %xmm3, %xmm2
626; SSE-NEXT:    maxss %xmm2, %xmm0
627; SSE-NEXT:    retq
628;
629; AVX-LABEL: reassociate_maxs_single:
630; AVX:       # %bb.0:
631; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
632; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
633; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
634; AVX-NEXT:    retq
635  %t0 = fdiv float %x0, %x1
636  %cmp1 = fcmp ogt float %x2, %t0
637  %sel1 = select i1 %cmp1, float %x2, float %t0
638  %cmp2 = fcmp ogt float %x3, %sel1
639  %sel2 = select i1 %cmp2, float %x3, float %sel1
640  ret float %sel2
641}
642
643; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
644
645define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
646; SSE-LABEL: reassociate_mins_double:
647; SSE:       # %bb.0:
648; SSE-NEXT:    divsd %xmm1, %xmm0
649; SSE-NEXT:    minsd %xmm3, %xmm2
650; SSE-NEXT:    minsd %xmm2, %xmm0
651; SSE-NEXT:    retq
652;
653; AVX-LABEL: reassociate_mins_double:
654; AVX:       # %bb.0:
655; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
656; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
657; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
658; AVX-NEXT:    retq
659  %t0 = fdiv double %x0, %x1
660  %cmp1 = fcmp olt double %x2, %t0
661  %sel1 = select i1 %cmp1, double %x2, double %t0
662  %cmp2 = fcmp olt double %x3, %sel1
663  %sel2 = select i1 %cmp2, double %x3, double %sel1
664  ret double %sel2
665}
666
667; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
668
669define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
670; SSE-LABEL: reassociate_maxs_double:
671; SSE:       # %bb.0:
672; SSE-NEXT:    divsd %xmm1, %xmm0
673; SSE-NEXT:    maxsd %xmm3, %xmm2
674; SSE-NEXT:    maxsd %xmm2, %xmm0
675; SSE-NEXT:    retq
676;
677; AVX-LABEL: reassociate_maxs_double:
678; AVX:       # %bb.0:
679; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
680; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
681; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
682; AVX-NEXT:    retq
683  %t0 = fdiv double %x0, %x1
684  %cmp1 = fcmp ogt double %x2, %t0
685  %sel1 = select i1 %cmp1, double %x2, double %t0
686  %cmp2 = fcmp ogt double %x3, %sel1
687  %sel2 = select i1 %cmp2, double %x3, double %sel1
688  ret double %sel2
689}
690
691; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
692
693define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
694; SSE-LABEL: reassociate_mins_v4f32:
695; SSE:       # %bb.0:
696; SSE-NEXT:    addps %xmm1, %xmm0
697; SSE-NEXT:    minps %xmm3, %xmm2
698; SSE-NEXT:    minps %xmm2, %xmm0
699; SSE-NEXT:    retq
700;
701; AVX-LABEL: reassociate_mins_v4f32:
702; AVX:       # %bb.0:
703; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
704; AVX-NEXT:    vminps %xmm3, %xmm2, %xmm1
705; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
706; AVX-NEXT:    retq
707  %t0 = fadd <4 x float> %x0, %x1
708  %cmp1 = fcmp olt <4 x float> %x2, %t0
709  %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
710  %cmp2 = fcmp olt <4 x float> %x3, %sel1
711  %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
712  ret <4 x float> %sel2
713}
714
715; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
716
717define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
718; SSE-LABEL: reassociate_maxs_v4f32:
719; SSE:       # %bb.0:
720; SSE-NEXT:    addps %xmm1, %xmm0
721; SSE-NEXT:    maxps %xmm3, %xmm2
722; SSE-NEXT:    maxps %xmm2, %xmm0
723; SSE-NEXT:    retq
724;
725; AVX-LABEL: reassociate_maxs_v4f32:
726; AVX:       # %bb.0:
727; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
728; AVX-NEXT:    vmaxps %xmm3, %xmm2, %xmm1
729; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
730; AVX-NEXT:    retq
731  %t0 = fadd <4 x float> %x0, %x1
732  %cmp1 = fcmp ogt <4 x float> %x2, %t0
733  %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
734  %cmp2 = fcmp ogt <4 x float> %x3, %sel1
735  %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
736  ret <4 x float> %sel2
737}
738
739; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
740
741define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
742; SSE-LABEL: reassociate_mins_v2f64:
743; SSE:       # %bb.0:
744; SSE-NEXT:    addpd %xmm1, %xmm0
745; SSE-NEXT:    minpd %xmm3, %xmm2
746; SSE-NEXT:    minpd %xmm2, %xmm0
747; SSE-NEXT:    retq
748;
749; AVX-LABEL: reassociate_mins_v2f64:
750; AVX:       # %bb.0:
751; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
752; AVX-NEXT:    vminpd %xmm3, %xmm2, %xmm1
753; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
754; AVX-NEXT:    retq
755  %t0 = fadd <2 x double> %x0, %x1
756  %cmp1 = fcmp olt <2 x double> %x2, %t0
757  %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
758  %cmp2 = fcmp olt <2 x double> %x3, %sel1
759  %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
760  ret <2 x double> %sel2
761}
762
763; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
764
765define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
766; SSE-LABEL: reassociate_maxs_v2f64:
767; SSE:       # %bb.0:
768; SSE-NEXT:    addpd %xmm1, %xmm0
769; SSE-NEXT:    maxpd %xmm3, %xmm2
770; SSE-NEXT:    maxpd %xmm2, %xmm0
771; SSE-NEXT:    retq
772;
773; AVX-LABEL: reassociate_maxs_v2f64:
774; AVX:       # %bb.0:
775; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
776; AVX-NEXT:    vmaxpd %xmm3, %xmm2, %xmm1
777; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
778; AVX-NEXT:    retq
779  %t0 = fadd <2 x double> %x0, %x1
780  %cmp1 = fcmp ogt <2 x double> %x2, %t0
781  %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
782  %cmp2 = fcmp ogt <2 x double> %x3, %sel1
783  %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
784  ret <2 x double> %sel2
785}
786
787; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
788
789define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
790; SSE-LABEL: reassociate_mins_v8f32:
791; SSE:       # %bb.0:
792; SSE-NEXT:    addps %xmm2, %xmm0
793; SSE-NEXT:    addps %xmm3, %xmm1
794; SSE-NEXT:    minps %xmm6, %xmm4
795; SSE-NEXT:    minps %xmm4, %xmm0
796; SSE-NEXT:    minps %xmm7, %xmm5
797; SSE-NEXT:    minps %xmm5, %xmm1
798; SSE-NEXT:    retq
799;
800; AVX-LABEL: reassociate_mins_v8f32:
801; AVX:       # %bb.0:
802; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
803; AVX-NEXT:    vminps %ymm3, %ymm2, %ymm1
804; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
805; AVX-NEXT:    retq
806  %t0 = fadd <8 x float> %x0, %x1
807  %cmp1 = fcmp olt <8 x float> %x2, %t0
808  %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
809  %cmp2 = fcmp olt <8 x float> %x3, %sel1
810  %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
811  ret <8 x float> %sel2
812}
813
814; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
815
816define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
817; SSE-LABEL: reassociate_maxs_v8f32:
818; SSE:       # %bb.0:
819; SSE-NEXT:    addps %xmm2, %xmm0
820; SSE-NEXT:    addps %xmm3, %xmm1
821; SSE-NEXT:    maxps %xmm6, %xmm4
822; SSE-NEXT:    maxps %xmm4, %xmm0
823; SSE-NEXT:    maxps %xmm7, %xmm5
824; SSE-NEXT:    maxps %xmm5, %xmm1
825; SSE-NEXT:    retq
826;
827; AVX-LABEL: reassociate_maxs_v8f32:
828; AVX:       # %bb.0:
829; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
830; AVX-NEXT:    vmaxps %ymm3, %ymm2, %ymm1
831; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
832; AVX-NEXT:    retq
833  %t0 = fadd <8 x float> %x0, %x1
834  %cmp1 = fcmp ogt <8 x float> %x2, %t0
835  %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
836  %cmp2 = fcmp ogt <8 x float> %x3, %sel1
837  %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
838  ret <8 x float> %sel2
839}
840
841; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
842
843define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
844; SSE-LABEL: reassociate_mins_v4f64:
845; SSE:       # %bb.0:
846; SSE-NEXT:    addpd %xmm2, %xmm0
847; SSE-NEXT:    addpd %xmm3, %xmm1
848; SSE-NEXT:    minpd %xmm6, %xmm4
849; SSE-NEXT:    minpd %xmm4, %xmm0
850; SSE-NEXT:    minpd %xmm7, %xmm5
851; SSE-NEXT:    minpd %xmm5, %xmm1
852; SSE-NEXT:    retq
853;
854; AVX-LABEL: reassociate_mins_v4f64:
855; AVX:       # %bb.0:
856; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
857; AVX-NEXT:    vminpd %ymm3, %ymm2, %ymm1
858; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
859; AVX-NEXT:    retq
860  %t0 = fadd <4 x double> %x0, %x1
861  %cmp1 = fcmp olt <4 x double> %x2, %t0
862  %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
863  %cmp2 = fcmp olt <4 x double> %x3, %sel1
864  %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
865  ret <4 x double> %sel2
866}
867
868; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
869
870define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
871; SSE-LABEL: reassociate_maxs_v4f64:
872; SSE:       # %bb.0:
873; SSE-NEXT:    addpd %xmm2, %xmm0
874; SSE-NEXT:    addpd %xmm3, %xmm1
875; SSE-NEXT:    maxpd %xmm6, %xmm4
876; SSE-NEXT:    maxpd %xmm4, %xmm0
877; SSE-NEXT:    maxpd %xmm7, %xmm5
878; SSE-NEXT:    maxpd %xmm5, %xmm1
879; SSE-NEXT:    retq
880;
881; AVX-LABEL: reassociate_maxs_v4f64:
882; AVX:       # %bb.0:
883; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
884; AVX-NEXT:    vmaxpd %ymm3, %ymm2, %ymm1
885; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
886; AVX-NEXT:    retq
887  %t0 = fadd <4 x double> %x0, %x1
888  %cmp1 = fcmp ogt <4 x double> %x2, %t0
889  %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
890  %cmp2 = fcmp ogt <4 x double> %x3, %sel1
891  %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
892  ret <4 x double> %sel2
893}
894
895; Verify that AVX512 512-bit vector single-precision minimum ops are reassociated.
896
897define <16 x float> @reassociate_mins_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
898; SSE-LABEL: reassociate_mins_v16f32:
899; SSE:       # %bb.0:
900; SSE-NEXT:    addps %xmm4, %xmm0
901; SSE-NEXT:    addps %xmm5, %xmm1
902; SSE-NEXT:    addps %xmm6, %xmm2
903; SSE-NEXT:    addps %xmm7, %xmm3
904; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm3
905; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm2
906; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm1
907; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm0
908; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm0
909; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm1
910; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm2
911; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm3
912; SSE-NEXT:    retq
913;
914; AVX1-LABEL: reassociate_mins_v16f32:
915; AVX1:       # %bb.0:
916; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm0
917; AVX1-NEXT:    vaddps %ymm3, %ymm1, %ymm1
918; AVX1-NEXT:    vminps %ymm6, %ymm4, %ymm2
919; AVX1-NEXT:    vminps %ymm2, %ymm0, %ymm0
920; AVX1-NEXT:    vminps %ymm7, %ymm5, %ymm2
921; AVX1-NEXT:    vminps %ymm2, %ymm1, %ymm1
922; AVX1-NEXT:    retq
923;
924; AVX512-LABEL: reassociate_mins_v16f32:
925; AVX512:       # %bb.0:
926; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0
927; AVX512-NEXT:    vminps %zmm3, %zmm2, %zmm1
928; AVX512-NEXT:    vminps %zmm1, %zmm0, %zmm0
929; AVX512-NEXT:    retq
930  %t0 = fadd <16 x float> %x0, %x1
931  %cmp1 = fcmp olt <16 x float> %x2, %t0
932  %sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
933  %cmp2 = fcmp olt <16 x float> %x3, %sel1
934  %sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
935  ret <16 x float> %sel2
936}
937
938; Verify that AVX512 512-bit vector single-precision maximum ops are reassociated.
939
940define <16 x float> @reassociate_maxs_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
941; SSE-LABEL: reassociate_maxs_v16f32:
942; SSE:       # %bb.0:
943; SSE-NEXT:    addps %xmm4, %xmm0
944; SSE-NEXT:    addps %xmm5, %xmm1
945; SSE-NEXT:    addps %xmm6, %xmm2
946; SSE-NEXT:    addps %xmm7, %xmm3
947; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm3
948; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm2
949; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm1
950; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm0
951; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm0
952; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm1
953; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm2
954; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm3
955; SSE-NEXT:    retq
956;
957; AVX1-LABEL: reassociate_maxs_v16f32:
958; AVX1:       # %bb.0:
959; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm0
960; AVX1-NEXT:    vaddps %ymm3, %ymm1, %ymm1
961; AVX1-NEXT:    vmaxps %ymm6, %ymm4, %ymm2
962; AVX1-NEXT:    vmaxps %ymm2, %ymm0, %ymm0
963; AVX1-NEXT:    vmaxps %ymm7, %ymm5, %ymm2
964; AVX1-NEXT:    vmaxps %ymm2, %ymm1, %ymm1
965; AVX1-NEXT:    retq
966;
967; AVX512-LABEL: reassociate_maxs_v16f32:
968; AVX512:       # %bb.0:
969; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0
970; AVX512-NEXT:    vmaxps %zmm3, %zmm2, %zmm1
971; AVX512-NEXT:    vmaxps %zmm1, %zmm0, %zmm0
972; AVX512-NEXT:    retq
973  %t0 = fadd <16 x float> %x0, %x1
974  %cmp1 = fcmp ogt <16 x float> %x2, %t0
975  %sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
976  %cmp2 = fcmp ogt <16 x float> %x3, %sel1
977  %sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
978  ret <16 x float> %sel2
979}
980
981; Verify that AVX512 512-bit vector double-precision minimum ops are reassociated.
982
983define <8 x double> @reassociate_mins_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
984; SSE-LABEL: reassociate_mins_v8f64:
985; SSE:       # %bb.0:
986; SSE-NEXT:    addpd %xmm4, %xmm0
987; SSE-NEXT:    addpd %xmm5, %xmm1
988; SSE-NEXT:    addpd %xmm6, %xmm2
989; SSE-NEXT:    addpd %xmm7, %xmm3
990; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm3
991; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm2
992; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm1
993; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm0
994; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm0
995; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm1
996; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm2
997; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm3
998; SSE-NEXT:    retq
999;
1000; AVX1-LABEL: reassociate_mins_v8f64:
1001; AVX1:       # %bb.0:
1002; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
1003; AVX1-NEXT:    vaddpd %ymm3, %ymm1, %ymm1
1004; AVX1-NEXT:    vminpd %ymm6, %ymm4, %ymm2
1005; AVX1-NEXT:    vminpd %ymm2, %ymm0, %ymm0
1006; AVX1-NEXT:    vminpd %ymm7, %ymm5, %ymm2
1007; AVX1-NEXT:    vminpd %ymm2, %ymm1, %ymm1
1008; AVX1-NEXT:    retq
1009;
1010; AVX512-LABEL: reassociate_mins_v8f64:
1011; AVX512:       # %bb.0:
1012; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
1013; AVX512-NEXT:    vminpd %zmm3, %zmm2, %zmm1
1014; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
1015; AVX512-NEXT:    retq
1016  %t0 = fadd <8 x double> %x0, %x1
1017  %cmp1 = fcmp olt <8 x double> %x2, %t0
1018  %sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
1019  %cmp2 = fcmp olt <8 x double> %x3, %sel1
1020  %sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
1021  ret <8 x double> %sel2
1022}
1023
1024; Verify that AVX512 512-bit vector double-precision maximum ops are reassociated.
1025
1026define <8 x double> @reassociate_maxs_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
1027; SSE-LABEL: reassociate_maxs_v8f64:
1028; SSE:       # %bb.0:
1029; SSE-NEXT:    addpd %xmm4, %xmm0
1030; SSE-NEXT:    addpd %xmm5, %xmm1
1031; SSE-NEXT:    addpd %xmm6, %xmm2
1032; SSE-NEXT:    addpd %xmm7, %xmm3
1033; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm3
1034; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm2
1035; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm1
1036; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm0
1037; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm0
1038; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm1
1039; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm2
1040; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm3
1041; SSE-NEXT:    retq
1042;
1043; AVX1-LABEL: reassociate_maxs_v8f64:
1044; AVX1:       # %bb.0:
1045; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
1046; AVX1-NEXT:    vaddpd %ymm3, %ymm1, %ymm1
1047; AVX1-NEXT:    vmaxpd %ymm6, %ymm4, %ymm2
1048; AVX1-NEXT:    vmaxpd %ymm2, %ymm0, %ymm0
1049; AVX1-NEXT:    vmaxpd %ymm7, %ymm5, %ymm2
1050; AVX1-NEXT:    vmaxpd %ymm2, %ymm1, %ymm1
1051; AVX1-NEXT:    retq
1052;
1053; AVX512-LABEL: reassociate_maxs_v8f64:
1054; AVX512:       # %bb.0:
1055; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
1056; AVX512-NEXT:    vmaxpd %zmm3, %zmm2, %zmm1
1057; AVX512-NEXT:    vmaxpd %zmm1, %zmm0, %zmm0
1058; AVX512-NEXT:    retq
1059  %t0 = fadd <8 x double> %x0, %x1
1060  %cmp1 = fcmp ogt <8 x double> %x2, %t0
1061  %sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
1062  %cmp2 = fcmp ogt <8 x double> %x3, %sel1
1063  %sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
1064  ret <8 x double> %sel2
1065}
1066
1067; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
1068; Verify that reassociation is not happening needlessly or wrongly.
1069
1070declare double @bar()
1071
1072define double @reassociate_adds_from_calls() {
1073; SSE-LABEL: reassociate_adds_from_calls:
1074; SSE:       # %bb.0:
1075; SSE-NEXT:    subq $24, %rsp
1076; SSE-NEXT:    .cfi_def_cfa_offset 32
1077; SSE-NEXT:    callq bar@PLT
1078; SSE-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1079; SSE-NEXT:    callq bar@PLT
1080; SSE-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1081; SSE-NEXT:    callq bar@PLT
1082; SSE-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1083; SSE-NEXT:    callq bar@PLT
1084; SSE-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1085; SSE-NEXT:    # xmm1 = mem[0],zero
1086; SSE-NEXT:    addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
1087; SSE-NEXT:    addsd (%rsp), %xmm0 # 8-byte Folded Reload
1088; SSE-NEXT:    addsd %xmm1, %xmm0
1089; SSE-NEXT:    addq $24, %rsp
1090; SSE-NEXT:    .cfi_def_cfa_offset 8
1091; SSE-NEXT:    retq
1092;
1093; AVX-LABEL: reassociate_adds_from_calls:
1094; AVX:       # %bb.0:
1095; AVX-NEXT:    subq $24, %rsp
1096; AVX-NEXT:    .cfi_def_cfa_offset 32
1097; AVX-NEXT:    callq bar@PLT
1098; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1099; AVX-NEXT:    callq bar@PLT
1100; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1101; AVX-NEXT:    callq bar@PLT
1102; AVX-NEXT:    vmovsd %xmm0, (%rsp) # 8-byte Spill
1103; AVX-NEXT:    callq bar@PLT
1104; AVX-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1105; AVX-NEXT:    # xmm1 = mem[0],zero
1106; AVX-NEXT:    vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 8-byte Folded Reload
1107; AVX-NEXT:    vaddsd (%rsp), %xmm0, %xmm0 # 8-byte Folded Reload
1108; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
1109; AVX-NEXT:    addq $24, %rsp
1110; AVX-NEXT:    .cfi_def_cfa_offset 8
1111; AVX-NEXT:    retq
1112
1113  %x0 = call double @bar()
1114  %x1 = call double @bar()
1115  %x2 = call double @bar()
1116  %x3 = call double @bar()
1117  %t0 = fadd reassoc nsz double %x0, %x1
1118  %t1 = fadd reassoc nsz double %t0, %x2
1119  %t2 = fadd reassoc nsz double %t1, %x3
1120  ret double %t2
1121}
1122
1123define double @already_reassociated() {
1124; SSE-LABEL: already_reassociated:
1125; SSE:       # %bb.0:
1126; SSE-NEXT:    subq $24, %rsp
1127; SSE-NEXT:    .cfi_def_cfa_offset 32
1128; SSE-NEXT:    callq bar@PLT
1129; SSE-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1130; SSE-NEXT:    callq bar@PLT
1131; SSE-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1132; SSE-NEXT:    callq bar@PLT
1133; SSE-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
1134; SSE-NEXT:    callq bar@PLT
1135; SSE-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1136; SSE-NEXT:    # xmm1 = mem[0],zero
1137; SSE-NEXT:    addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
1138; SSE-NEXT:    addsd (%rsp), %xmm0 # 8-byte Folded Reload
1139; SSE-NEXT:    addsd %xmm1, %xmm0
1140; SSE-NEXT:    addq $24, %rsp
1141; SSE-NEXT:    .cfi_def_cfa_offset 8
1142; SSE-NEXT:    retq
1143;
1144; AVX-LABEL: already_reassociated:
1145; AVX:       # %bb.0:
1146; AVX-NEXT:    subq $24, %rsp
1147; AVX-NEXT:    .cfi_def_cfa_offset 32
1148; AVX-NEXT:    callq bar@PLT
1149; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1150; AVX-NEXT:    callq bar@PLT
1151; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1152; AVX-NEXT:    callq bar@PLT
1153; AVX-NEXT:    vmovsd %xmm0, (%rsp) # 8-byte Spill
1154; AVX-NEXT:    callq bar@PLT
1155; AVX-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1156; AVX-NEXT:    # xmm1 = mem[0],zero
1157; AVX-NEXT:    vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 8-byte Folded Reload
1158; AVX-NEXT:    vaddsd (%rsp), %xmm0, %xmm0 # 8-byte Folded Reload
1159; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
1160; AVX-NEXT:    addq $24, %rsp
1161; AVX-NEXT:    .cfi_def_cfa_offset 8
1162; AVX-NEXT:    retq
1163
1164  %x0 = call double @bar()
1165  %x1 = call double @bar()
1166  %x2 = call double @bar()
1167  %x3 = call double @bar()
1168  %t0 = fadd reassoc nsz double %x0, %x1
1169  %t1 = fadd reassoc nsz double %x2, %x3
1170  %t2 = fadd reassoc nsz double %t0, %t1
1171  ret double %t2
1172}
1173
1174