1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
4
5; VFMADD
6define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
7; CHECK-LABEL: test_x86_fma4_vfmadd_ss:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
10; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
11; CHECK-NEXT:    retq # encoding: [0xc3]
12  %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
13  ret <4 x float> %res
14}
15
16define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
17; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss:
18; CHECK:       # %bb.0:
19; CHECK-NEXT:    vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00]
20; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
21; CHECK-NEXT:    retq # encoding: [0xc3]
22  %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
23  ret <4 x float> %res
24}
25declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
26
27define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
28; CHECK-LABEL: test_x86_fma4_vfmadd_sd:
29; CHECK:       # %bb.0:
30; CHECK-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
31; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
32; CHECK-NEXT:    retq # encoding: [0xc3]
33  %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
34  ret <2 x double> %res
35}
36
37define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
38; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00]
41; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
42; CHECK-NEXT:    retq # encoding: [0xc3]
43  %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
44  ret <2 x double> %res
45}
46declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
47
48define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
49; CHECK-LABEL: test_x86_fma_vfmadd_ps:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
52; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
53; CHECK-NEXT:    retq # encoding: [0xc3]
54  %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
55  ret <4 x float> %1
56}
57
58define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
59; CHECK-LABEL: test_x86_fma_vfmadd_pd:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
62; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
63; CHECK-NEXT:    retq # encoding: [0xc3]
64  %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
65  ret <2 x double> %1
66}
67
68define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
69; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
72; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
73; CHECK-NEXT:    retq # encoding: [0xc3]
74  %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
75  ret <8 x float> %1
76}
77
78define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
79; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
82; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
83; CHECK-NEXT:    retq # encoding: [0xc3]
84  %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
85  ret <4 x double> %1
86}
87
88; VFMSUB
89define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
90; CHECK-LABEL: test_x86_fma_vfmsub_ps:
91; CHECK:       # %bb.0:
92; CHECK-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
93; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) - xmm2
94; CHECK-NEXT:    retq # encoding: [0xc3]
95  %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
96  %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
97  ret <4 x float> %2
98}
99
100define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
101; CHECK-LABEL: test_x86_fma_vfmsub_pd:
102; CHECK:       # %bb.0:
103; CHECK-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
104; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) - xmm2
105; CHECK-NEXT:    retq # encoding: [0xc3]
106  %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
107  %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
108  ret <2 x double> %2
109}
110
111define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
112; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
113; CHECK:       # %bb.0:
114; CHECK-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
115; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) - ymm2
116; CHECK-NEXT:    retq # encoding: [0xc3]
117  %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
118  %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
119  ret <8 x float> %2
120}
121
122define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
123; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
126; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) - ymm2
127; CHECK-NEXT:    retq # encoding: [0xc3]
128  %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
129  %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
130  ret <4 x double> %2
131}
132
133; VFNMADD
134define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
135; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
136; CHECK:       # %bb.0:
137; CHECK-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
138; CHECK-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
139; CHECK-NEXT:    retq # encoding: [0xc3]
140  %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
141  %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
142  ret <4 x float> %2
143}
144
145define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
146; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
147; CHECK:       # %bb.0:
148; CHECK-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
149; CHECK-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
150; CHECK-NEXT:    retq # encoding: [0xc3]
151  %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
152  %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
153  ret <2 x double> %2
154}
155
156define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
157; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
158; CHECK:       # %bb.0:
159; CHECK-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
160; CHECK-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
161; CHECK-NEXT:    retq # encoding: [0xc3]
162  %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
163  %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
164  ret <8 x float> %2
165}
166
167define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
168; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
171; CHECK-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
172; CHECK-NEXT:    retq # encoding: [0xc3]
173  %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
174  %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
175  ret <4 x double> %2
176}
177
178; VFNMSUB
179define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
180; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
181; CHECK:       # %bb.0:
182; CHECK-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
183; CHECK-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
184; CHECK-NEXT:    retq # encoding: [0xc3]
185  %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
186  %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
187  %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2)
188  ret <4 x float> %3
189}
190
191define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
192; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
195; CHECK-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
196; CHECK-NEXT:    retq # encoding: [0xc3]
197  %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
198  %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
199  %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2)
200  ret <2 x double> %3
201}
202
203define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
204; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
207; CHECK-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
208; CHECK-NEXT:    retq # encoding: [0xc3]
209  %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
210  %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
211  %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2)
212  ret <8 x float> %3
213}
214
215define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
216; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
217; CHECK:       # %bb.0:
218; CHECK-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
219; CHECK-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
220; CHECK-NEXT:    retq # encoding: [0xc3]
221  %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
222  %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
223  %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2)
224  ret <4 x double> %3
225}
226
227; VFMADDSUB
228define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
229; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
230; CHECK:       # %bb.0:
231; CHECK-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
232; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
233; CHECK-NEXT:    retq # encoding: [0xc3]
234  %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
235  %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
236  %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
237  %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
238  ret <4 x float> %4
239}
240
241define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
242; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
245; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
246; CHECK-NEXT:    retq # encoding: [0xc3]
247  %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
248  %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
249  %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
250  %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
251  ret <2 x double> %4
252}
253
254define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
255; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
258; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
259; CHECK-NEXT:    retq # encoding: [0xc3]
260  %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
261  %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
262  %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
263  %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
264  ret <8 x float> %4
265}
266
267define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
268; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
269; CHECK:       # %bb.0:
270; CHECK-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
271; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
272; CHECK-NEXT:    retq # encoding: [0xc3]
273  %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
274  %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
275  %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
276  %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
277  ret <4 x double> %4
278}
279
280; VFMSUBADD
281define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
282; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
285; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) -/+ xmm2
286; CHECK-NEXT:    retq # encoding: [0xc3]
287  %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
288  %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
289  %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
290  %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
291  ret <4 x float> %4
292}
293
294define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
295; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
296; CHECK:       # %bb.0:
297; CHECK-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
298; CHECK-NEXT:    # xmm0 = (xmm0 * xmm1) -/+ xmm2
299; CHECK-NEXT:    retq # encoding: [0xc3]
300  %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
301  %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
302  %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
303  %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
304  ret <2 x double> %4
305}
306
307define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
308; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
309; CHECK:       # %bb.0:
310; CHECK-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
311; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) -/+ ymm2
312; CHECK-NEXT:    retq # encoding: [0xc3]
313  %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
314  %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
315  %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
316  %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
317  ret <8 x float> %4
318}
319
320define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
321; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
322; CHECK:       # %bb.0:
323; CHECK-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
324; CHECK-NEXT:    # ymm0 = (ymm0 * ymm1) -/+ ymm2
325; CHECK-NEXT:    retq # encoding: [0xc3]
326  %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
327  %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
328  %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
329  %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
330  ret <4 x double> %4
331}
332
333declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2
334declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #2
335declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #2
336declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #2
337
338attributes #0 = { nounwind }
339