1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx  | FileCheck %s --check-prefix=AVX
4
5define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind {
6; SSE-LABEL: fadd_op1_constant_v4f32:
7; SSE:       # %bb.0:
8; SSE-NEXT:    addss {{.*}}(%rip), %xmm0
9; SSE-NEXT:    retq
10;
11; AVX-LABEL: fadd_op1_constant_v4f32:
12; AVX:       # %bb.0:
13; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
14; AVX-NEXT:    retq
15  %v = insertelement <4 x float> undef, float %x, i32 0
16  %b = fadd <4 x float> %v, <float 42.0, float undef, float undef, float undef>
17  ret <4 x float> %b
18}
19
20define <4 x float> @load_fadd_op1_constant_v4f32(float* %p) nounwind {
21; SSE-LABEL: load_fadd_op1_constant_v4f32:
22; SSE:       # %bb.0:
23; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
24; SSE-NEXT:    addss {{.*}}(%rip), %xmm0
25; SSE-NEXT:    retq
26;
27; AVX-LABEL: load_fadd_op1_constant_v4f32:
28; AVX:       # %bb.0:
29; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
30; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
31; AVX-NEXT:    retq
32  %x = load float, float* %p
33  %v = insertelement <4 x float> undef, float %x, i32 0
34  %b = fadd <4 x float> %v, <float 42.0, float undef, float undef, float undef>
35  ret <4 x float> %b
36}
37
38define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind {
39; SSE-LABEL: fsub_op0_constant_v4f32:
40; SSE:       # %bb.0:
41; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
42; SSE-NEXT:    subss %xmm0, %xmm1
43; SSE-NEXT:    movaps %xmm1, %xmm0
44; SSE-NEXT:    retq
45;
46; AVX-LABEL: fsub_op0_constant_v4f32:
47; AVX:       # %bb.0:
48; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
49; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
50; AVX-NEXT:    retq
51  %v = insertelement <4 x float> undef, float %x, i32 0
52  %b = fsub <4 x float> <float 42.0, float undef, float undef, float undef>, %v
53  ret <4 x float> %b
54}
55
56define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind {
57; SSE-LABEL: load_fsub_op0_constant_v4f32:
58; SSE:       # %bb.0:
59; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
60; SSE-NEXT:    subss (%rdi), %xmm0
61; SSE-NEXT:    retq
62;
63; AVX-LABEL: load_fsub_op0_constant_v4f32:
64; AVX:       # %bb.0:
65; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
66; AVX-NEXT:    vsubss (%rdi), %xmm0, %xmm0
67; AVX-NEXT:    retq
68  %x = load float, float* %p
69  %v = insertelement <4 x float> undef, float %x, i32 0
70  %b = fsub <4 x float> <float 42.0, float undef, float undef, float undef>, %v
71  ret <4 x float> %b
72}
73
74define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind {
75; SSE-LABEL: fmul_op1_constant_v4f32:
76; SSE:       # %bb.0:
77; SSE-NEXT:    mulss {{.*}}(%rip), %xmm0
78; SSE-NEXT:    retq
79;
80; AVX-LABEL: fmul_op1_constant_v4f32:
81; AVX:       # %bb.0:
82; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
83; AVX-NEXT:    retq
84  %v = insertelement <4 x float> undef, float %x, i32 0
85  %b = fmul <4 x float> %v, <float 42.0, float undef, float undef, float undef>
86  ret <4 x float> %b
87}
88
89define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind {
90; SSE-LABEL: load_fmul_op1_constant_v4f32:
91; SSE:       # %bb.0:
92; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
93; SSE-NEXT:    mulss {{.*}}(%rip), %xmm0
94; SSE-NEXT:    retq
95;
96; AVX-LABEL: load_fmul_op1_constant_v4f32:
97; AVX:       # %bb.0:
98; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
99; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
100; AVX-NEXT:    retq
101  %x = load float, float* %p
102  %v = insertelement <4 x float> undef, float %x, i32 0
103  %b = fmul <4 x float> %v, <float 42.0, float undef, float undef, float undef>
104  ret <4 x float> %b
105}
106
107define <4 x float> @fdiv_op1_constant_v4f32(float %x) nounwind {
108; SSE-LABEL: fdiv_op1_constant_v4f32:
109; SSE:       # %bb.0:
110; SSE-NEXT:    divss {{.*}}(%rip), %xmm0
111; SSE-NEXT:    retq
112;
113; AVX-LABEL: fdiv_op1_constant_v4f32:
114; AVX:       # %bb.0:
115; AVX-NEXT:    vdivss {{.*}}(%rip), %xmm0, %xmm0
116; AVX-NEXT:    retq
117  %v = insertelement <4 x float> undef, float %x, i32 0
118  %b = fdiv <4 x float> %v, <float 42.0, float undef, float undef, float undef>
119  ret <4 x float> %b
120}
121
122define <4 x float> @load_fdiv_op1_constant_v4f32(float* %p) nounwind {
123; SSE-LABEL: load_fdiv_op1_constant_v4f32:
124; SSE:       # %bb.0:
125; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
126; SSE-NEXT:    divss {{.*}}(%rip), %xmm0
127; SSE-NEXT:    retq
128;
129; AVX-LABEL: load_fdiv_op1_constant_v4f32:
130; AVX:       # %bb.0:
131; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
132; AVX-NEXT:    vdivss {{.*}}(%rip), %xmm0, %xmm0
133; AVX-NEXT:    retq
134  %x = load float, float* %p
135  %v = insertelement <4 x float> undef, float %x, i32 0
136  %b = fdiv <4 x float> %v, <float 42.0, float undef, float undef, float undef>
137  ret <4 x float> %b
138}
139
140define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind {
141; SSE-LABEL: fdiv_op0_constant_v4f32:
142; SSE:       # %bb.0:
143; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
144; SSE-NEXT:    divss %xmm0, %xmm1
145; SSE-NEXT:    movaps %xmm1, %xmm0
146; SSE-NEXT:    retq
147;
148; AVX-LABEL: fdiv_op0_constant_v4f32:
149; AVX:       # %bb.0:
150; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
151; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
152; AVX-NEXT:    retq
153  %v = insertelement <4 x float> undef, float %x, i32 0
154  %b = fdiv <4 x float> <float 42.0, float undef, float undef, float undef>, %v
155  ret <4 x float> %b
156}
157
158define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind {
159; SSE-LABEL: load_fdiv_op0_constant_v4f32:
160; SSE:       # %bb.0:
161; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
162; SSE-NEXT:    divss (%rdi), %xmm0
163; SSE-NEXT:    retq
164;
165; AVX-LABEL: load_fdiv_op0_constant_v4f32:
166; AVX:       # %bb.0:
167; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
168; AVX-NEXT:    vdivss (%rdi), %xmm0, %xmm0
169; AVX-NEXT:    retq
170  %x = load float, float* %p
171  %v = insertelement <4 x float> undef, float %x, i32 0
172  %b = fdiv <4 x float> <float 42.0, float undef, float undef, float undef>, %v
173  ret <4 x float> %b
174}
175
176define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind {
177; SSE-LABEL: fadd_op1_constant_v4f64:
178; SSE:       # %bb.0:
179; SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
180; SSE-NEXT:    retq
181;
182; AVX-LABEL: fadd_op1_constant_v4f64:
183; AVX:       # %bb.0:
184; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
185; AVX-NEXT:    retq
186  %v = insertelement <4 x double> undef, double %x, i32 0
187  %b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef>
188  ret <4 x double> %b
189}
190
191define <4 x double> @load_fadd_op1_constant_v4f64(double* %p) nounwind {
192; SSE-LABEL: load_fadd_op1_constant_v4f64:
193; SSE:       # %bb.0:
194; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
195; SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
196; SSE-NEXT:    retq
197;
198; AVX-LABEL: load_fadd_op1_constant_v4f64:
199; AVX:       # %bb.0:
200; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
201; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
202; AVX-NEXT:    retq
203  %x = load double, double* %p
204  %v = insertelement <4 x double> undef, double %x, i32 0
205  %b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef>
206  ret <4 x double> %b
207}
208
209define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind {
210; SSE-LABEL: fsub_op0_constant_v4f64:
211; SSE:       # %bb.0:
212; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
213; SSE-NEXT:    subsd %xmm0, %xmm1
214; SSE-NEXT:    movapd %xmm1, %xmm0
215; SSE-NEXT:    retq
216;
217; AVX-LABEL: fsub_op0_constant_v4f64:
218; AVX:       # %bb.0:
219; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
220; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
221; AVX-NEXT:    retq
222  %v = insertelement <4 x double> undef, double %x, i32 0
223  %b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v
224  ret <4 x double> %b
225}
226
227define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind {
228; SSE-LABEL: load_fsub_op0_constant_v4f64:
229; SSE:       # %bb.0:
230; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
231; SSE-NEXT:    subsd (%rdi), %xmm0
232; SSE-NEXT:    retq
233;
234; AVX-LABEL: load_fsub_op0_constant_v4f64:
235; AVX:       # %bb.0:
236; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
237; AVX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0
238; AVX-NEXT:    retq
239  %x = load double, double* %p
240  %v = insertelement <4 x double> undef, double %x, i32 0
241  %b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v
242  ret <4 x double> %b
243}
244
245define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind {
246; SSE-LABEL: fmul_op1_constant_v4f64:
247; SSE:       # %bb.0:
248; SSE-NEXT:    mulsd {{.*}}(%rip), %xmm0
249; SSE-NEXT:    retq
250;
251; AVX-LABEL: fmul_op1_constant_v4f64:
252; AVX:       # %bb.0:
253; AVX-NEXT:    vmulsd {{.*}}(%rip), %xmm0, %xmm0
254; AVX-NEXT:    retq
255  %v = insertelement <4 x double> undef, double %x, i32 0
256  %b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef>
257  ret <4 x double> %b
258}
259
260define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind {
261; SSE-LABEL: load_fmul_op1_constant_v4f64:
262; SSE:       # %bb.0:
263; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
264; SSE-NEXT:    mulsd {{.*}}(%rip), %xmm0
265; SSE-NEXT:    retq
266;
267; AVX-LABEL: load_fmul_op1_constant_v4f64:
268; AVX:       # %bb.0:
269; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
270; AVX-NEXT:    vmulsd {{.*}}(%rip), %xmm0, %xmm0
271; AVX-NEXT:    retq
272  %x = load double, double* %p
273  %v = insertelement <4 x double> undef, double %x, i32 0
274  %b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef>
275  ret <4 x double> %b
276}
277
278define <4 x double> @fdiv_op1_constant_v4f64(double %x) nounwind {
279; SSE-LABEL: fdiv_op1_constant_v4f64:
280; SSE:       # %bb.0:
281; SSE-NEXT:    divsd {{.*}}(%rip), %xmm0
282; SSE-NEXT:    retq
283;
284; AVX-LABEL: fdiv_op1_constant_v4f64:
285; AVX:       # %bb.0:
286; AVX-NEXT:    vdivsd {{.*}}(%rip), %xmm0, %xmm0
287; AVX-NEXT:    retq
288  %v = insertelement <4 x double> undef, double %x, i32 0
289  %b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef>
290  ret <4 x double> %b
291}
292
293define <4 x double> @load_fdiv_op1_constant_v4f64(double* %p) nounwind {
294; SSE-LABEL: load_fdiv_op1_constant_v4f64:
295; SSE:       # %bb.0:
296; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
297; SSE-NEXT:    divsd {{.*}}(%rip), %xmm0
298; SSE-NEXT:    retq
299;
300; AVX-LABEL: load_fdiv_op1_constant_v4f64:
301; AVX:       # %bb.0:
302; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
303; AVX-NEXT:    vdivsd {{.*}}(%rip), %xmm0, %xmm0
304; AVX-NEXT:    retq
305  %x = load double, double* %p
306  %v = insertelement <4 x double> undef, double %x, i32 0
307  %b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef>
308  ret <4 x double> %b
309}
310
311define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind {
312; SSE-LABEL: fdiv_op0_constant_v4f64:
313; SSE:       # %bb.0:
314; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
315; SSE-NEXT:    divsd %xmm0, %xmm1
316; SSE-NEXT:    movapd %xmm1, %xmm0
317; SSE-NEXT:    retq
318;
319; AVX-LABEL: fdiv_op0_constant_v4f64:
320; AVX:       # %bb.0:
321; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
322; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
323; AVX-NEXT:    retq
324  %v = insertelement <4 x double> undef, double %x, i32 0
325  %b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v
326  ret <4 x double> %b
327}
328
329define <4 x double> @load_fdiv_op0_constant_v4f64(double* %p) nounwind {
330; SSE-LABEL: load_fdiv_op0_constant_v4f64:
331; SSE:       # %bb.0:
332; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
333; SSE-NEXT:    divsd (%rdi), %xmm0
334; SSE-NEXT:    retq
335;
336; AVX-LABEL: load_fdiv_op0_constant_v4f64:
337; AVX:       # %bb.0:
338; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
339; AVX-NEXT:    vdivsd (%rdi), %xmm0, %xmm0
340; AVX-NEXT:    retq
341  %x = load double, double* %p
342  %v = insertelement <4 x double> undef, double %x, i32 0
343  %b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v
344  ret <4 x double> %b
345}
346
347define <2 x double> @fadd_splat_splat_v2f64(<2 x double> %vx, <2 x double> %vy) {
348; SSE-LABEL: fadd_splat_splat_v2f64:
349; SSE:       # %bb.0:
350; SSE-NEXT:    addsd %xmm1, %xmm0
351; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
352; SSE-NEXT:    retq
353;
354; AVX-LABEL: fadd_splat_splat_v2f64:
355; AVX:       # %bb.0:
356; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
357; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
358; AVX-NEXT:    retq
359  %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> zeroinitializer
360  %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> zeroinitializer
361  %r = fadd <2 x double> %splatx, %splaty
362  ret <2 x double> %r
363}
364
365define <4 x double> @fsub_splat_splat_v4f64(double %x, double %y) {
366; SSE-LABEL: fsub_splat_splat_v4f64:
367; SSE:       # %bb.0:
368; SSE-NEXT:    subsd %xmm1, %xmm0
369; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
370; SSE-NEXT:    movapd %xmm0, %xmm1
371; SSE-NEXT:    retq
372;
373; AVX-LABEL: fsub_splat_splat_v4f64:
374; AVX:       # %bb.0:
375; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
376; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
377; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
378; AVX-NEXT:    retq
379  %vx = insertelement <4 x double> undef, double %x, i32 0
380  %vy = insertelement <4 x double> undef, double %y, i32 0
381  %splatx = shufflevector <4 x double> %vx, <4 x double> undef, <4 x i32> zeroinitializer
382  %splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer
383  %r = fsub <4 x double> %splatx, %splaty
384  ret <4 x double> %r
385}
386
387define <4 x float> @fmul_splat_splat_v4f32(<4 x float> %vx, <4 x float> %vy) {
388; SSE-LABEL: fmul_splat_splat_v4f32:
389; SSE:       # %bb.0:
390; SSE-NEXT:    mulss %xmm1, %xmm0
391; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
392; SSE-NEXT:    retq
393;
394; AVX-LABEL: fmul_splat_splat_v4f32:
395; AVX:       # %bb.0:
396; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
397; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
398; AVX-NEXT:    retq
399  %splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer
400  %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer
401  %r = fmul fast <4 x float> %splatx, %splaty
402  ret <4 x float> %r
403}
404
405define <8 x float> @fdiv_splat_splat_v8f32(<8 x float> %vx, <8 x float> %vy) {
406; SSE-LABEL: fdiv_splat_splat_v8f32:
407; SSE:       # %bb.0:
408; SSE-NEXT:    divss %xmm2, %xmm0
409; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
410; SSE-NEXT:    movaps %xmm0, %xmm1
411; SSE-NEXT:    retq
412;
413; AVX-LABEL: fdiv_splat_splat_v8f32:
414; AVX:       # %bb.0:
415; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
416; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
417; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
418; AVX-NEXT:    retq
419  %splatx = shufflevector <8 x float> %vx, <8 x float> undef, <8 x i32> zeroinitializer
420  %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer
421  %r = fdiv fast <8 x float> %splatx, %splaty
422  ret <8 x float> %r
423}
424
425; Negative test - splat of non-zero indexes (still sink the splat).
426
427define <2 x double> @fadd_splat_splat_nonzero_v2f64(<2 x double> %vx, <2 x double> %vy) {
428; SSE-LABEL: fadd_splat_splat_nonzero_v2f64:
429; SSE:       # %bb.0:
430; SSE-NEXT:    addpd %xmm1, %xmm0
431; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
432; SSE-NEXT:    retq
433;
434; AVX-LABEL: fadd_splat_splat_nonzero_v2f64:
435; AVX:       # %bb.0:
436; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
437; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
438; AVX-NEXT:    retq
439  %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 1, i32 1>
440  %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 1, i32 1>
441  %r = fadd <2 x double> %splatx, %splaty
442  ret <2 x double> %r
443}
444
445; Negative test - splat of non-zero index and mismatched indexes.
446
447define <2 x double> @fadd_splat_splat_mismatch_v2f64(<2 x double> %vx, <2 x double> %vy) {
448; SSE-LABEL: fadd_splat_splat_mismatch_v2f64:
449; SSE:       # %bb.0:
450; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
451; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
452; SSE-NEXT:    addpd %xmm1, %xmm0
453; SSE-NEXT:    retq
454;
455; AVX-LABEL: fadd_splat_splat_mismatch_v2f64:
456; AVX:       # %bb.0:
457; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
458; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,1]
459; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
460; AVX-NEXT:    retq
461  %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 0, i32 0>
462  %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 1, i32 1>
463  %r = fadd <2 x double> %splatx, %splaty
464  ret <2 x double> %r
465}
466
467; Negative test - non-splat.
468
469define <2 x double> @fadd_splat_nonsplat_v2f64(<2 x double> %vx, <2 x double> %vy) {
470; SSE-LABEL: fadd_splat_nonsplat_v2f64:
471; SSE:       # %bb.0:
472; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
473; SSE-NEXT:    addpd %xmm1, %xmm0
474; SSE-NEXT:    retq
475;
476; AVX-LABEL: fadd_splat_nonsplat_v2f64:
477; AVX:       # %bb.0:
478; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
479; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
480; AVX-NEXT:    retq
481  %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> <i32 0, i32 0>
482  %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> <i32 0, i32 1>
483  %r = fadd <2 x double> %splatx, %splaty
484  ret <2 x double> %r
485}
486
487; Negative test - non-FP.
488
489define <2 x i64> @add_splat_splat_v2i64(<2 x i64> %vx, <2 x i64> %vy) {
490; SSE-LABEL: add_splat_splat_v2i64:
491; SSE:       # %bb.0:
492; SSE-NEXT:    paddq %xmm1, %xmm0
493; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
494; SSE-NEXT:    retq
495;
496; AVX-LABEL: add_splat_splat_v2i64:
497; AVX:       # %bb.0:
498; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
499; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
500; AVX-NEXT:    retq
501  %splatx = shufflevector <2 x i64> %vx, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
502  %splaty = shufflevector <2 x i64> %vy, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
503  %r = add <2 x i64> %splatx, %splaty
504  ret <2 x i64> %r
505}
506
507define <2 x double> @fadd_splat_const_op1_v2f64(<2 x double> %vx) {
508; SSE-LABEL: fadd_splat_const_op1_v2f64:
509; SSE:       # %bb.0:
510; SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
511; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
512; SSE-NEXT:    retq
513;
514; AVX-LABEL: fadd_splat_const_op1_v2f64:
515; AVX:       # %bb.0:
516; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
517; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
518; AVX-NEXT:    retq
519  %splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> zeroinitializer
520  %r = fadd <2 x double> %splatx, <double 42.0, double 42.0>
521  ret <2 x double> %r
522}
523
524define <4 x double> @fsub_const_op0_splat_v4f64(double %x) {
525; SSE-LABEL: fsub_const_op0_splat_v4f64:
526; SSE:       # %bb.0:
527; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
528; SSE-NEXT:    subsd %xmm0, %xmm1
529; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
530; SSE-NEXT:    movapd %xmm1, %xmm0
531; SSE-NEXT:    retq
532;
533; AVX-LABEL: fsub_const_op0_splat_v4f64:
534; AVX:       # %bb.0:
535; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
536; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
537; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
538; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
539; AVX-NEXT:    retq
540  %vx = insertelement <4 x double> undef, double 8.0, i32 0
541  %vy = insertelement <4 x double> undef, double %x, i32 0
542  %splatx = shufflevector <4 x double> %vx, <4 x double> undef, <4 x i32> zeroinitializer
543  %splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer
544  %r = fsub <4 x double> %splatx, %splaty
545  ret <4 x double> %r
546}
547
548define <4 x float> @fmul_splat_const_op1_v4f32(<4 x float> %vx, <4 x float> %vy) {
549; SSE-LABEL: fmul_splat_const_op1_v4f32:
550; SSE:       # %bb.0:
551; SSE-NEXT:    mulss {{.*}}(%rip), %xmm0
552; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
553; SSE-NEXT:    retq
554;
555; AVX-LABEL: fmul_splat_const_op1_v4f32:
556; AVX:       # %bb.0:
557; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
558; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
559; AVX-NEXT:    retq
560  %splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer
561  %r = fmul fast <4 x float> %splatx, <float 17.0, float 17.0, float 17.0, float 17.0>
562  ret <4 x float> %r
563}
564
565define <8 x float> @fdiv_splat_const_op0_v8f32(<8 x float> %vy) {
566; SSE-LABEL: fdiv_splat_const_op0_v8f32:
567; SSE:       # %bb.0:
568; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
569; SSE-NEXT:    divss %xmm0, %xmm1
570; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
571; SSE-NEXT:    movaps %xmm1, %xmm0
572; SSE-NEXT:    retq
573;
574; AVX-LABEL: fdiv_splat_const_op0_v8f32:
575; AVX:       # %bb.0:
576; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
577; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
578; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
579; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
580; AVX-NEXT:    retq
581  %splatx = shufflevector <8 x float> <float 4.5, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, <8 x float> undef, <8 x i32> zeroinitializer
582  %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer
583  %r = fdiv fast <8 x float> %splatx, %splaty
584  ret <8 x float> %r
585}
586
587define <8 x float> @fdiv_const_op1_splat_v8f32(<8 x float> %vx) {
588; SSE-LABEL: fdiv_const_op1_splat_v8f32:
589; SSE:       # %bb.0:
590; SSE-NEXT:    xorps %xmm1, %xmm1
591; SSE-NEXT:    divss %xmm1, %xmm0
592; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
593; SSE-NEXT:    movaps %xmm0, %xmm1
594; SSE-NEXT:    retq
595;
596; AVX-LABEL: fdiv_const_op1_splat_v8f32:
597; AVX:       # %bb.0:
598; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
599; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
600; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
601; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
602; AVX-NEXT:    retq
603  %splatx = shufflevector <8 x float> %vx, <8 x float> undef, <8 x i32> zeroinitializer
604  %splaty = shufflevector <8 x float> <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, <8 x float> undef, <8 x i32> zeroinitializer
605  %r = fdiv fast <8 x float> %splatx, %splaty
606  ret <8 x float> %r
607}
608
609define <2 x double> @splat0_fadd_v2f64(<2 x double> %vx, <2 x double> %vy) {
610; SSE-LABEL: splat0_fadd_v2f64:
611; SSE:       # %bb.0:
612; SSE-NEXT:    addsd %xmm1, %xmm0
613; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
614; SSE-NEXT:    retq
615;
616; AVX-LABEL: splat0_fadd_v2f64:
617; AVX:       # %bb.0:
618; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
619; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
620; AVX-NEXT:    retq
621  %b = fadd <2 x double> %vx, %vy
622  %r = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
623  ret <2 x double> %r
624}
625
626define <4 x double> @splat0_fsub_v4f64(double %x, double %y) {
627; SSE-LABEL: splat0_fsub_v4f64:
628; SSE:       # %bb.0:
629; SSE-NEXT:    subsd %xmm1, %xmm0
630; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
631; SSE-NEXT:    movapd %xmm0, %xmm1
632; SSE-NEXT:    retq
633;
634; AVX-LABEL: splat0_fsub_v4f64:
635; AVX:       # %bb.0:
636; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
637; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
638; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
639; AVX-NEXT:    retq
640  %vx = insertelement <4 x double> undef, double %x, i32 0
641  %vy = insertelement <4 x double> undef, double %y, i32 0
642  %b = fsub <4 x double> %vx, %vy
643  %r = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
644  ret <4 x double> %r
645}
646
647define <4 x float> @splat0_fmul_v4f32(<4 x float> %vx, <4 x float> %vy) {
648; SSE-LABEL: splat0_fmul_v4f32:
649; SSE:       # %bb.0:
650; SSE-NEXT:    mulss %xmm1, %xmm0
651; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
652; SSE-NEXT:    retq
653;
654; AVX-LABEL: splat0_fmul_v4f32:
655; AVX:       # %bb.0:
656; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
657; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
658; AVX-NEXT:    retq
659  %b = fmul fast <4 x float> %vx, %vy
660  %r = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
661  ret <4 x float> %r
662}
663
664define <8 x float> @splat0_fdiv_v8f32(<8 x float> %vx, <8 x float> %vy) {
665; SSE-LABEL: splat0_fdiv_v8f32:
666; SSE:       # %bb.0:
667; SSE-NEXT:    divss %xmm2, %xmm0
668; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
669; SSE-NEXT:    movaps %xmm0, %xmm1
670; SSE-NEXT:    retq
671;
672; AVX-LABEL: splat0_fdiv_v8f32:
673; AVX:       # %bb.0:
674; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
675; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
676; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
677; AVX-NEXT:    retq
678  %b = fdiv fast <8 x float> %vx, %vy
679  %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
680  ret <8 x float> %r
681}
682
683define <2 x double> @splat0_fadd_const_op1_v2f64(<2 x double> %vx) {
684; SSE-LABEL: splat0_fadd_const_op1_v2f64:
685; SSE:       # %bb.0:
686; SSE-NEXT:    addsd {{.*}}(%rip), %xmm0
687; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
688; SSE-NEXT:    retq
689;
690; AVX-LABEL: splat0_fadd_const_op1_v2f64:
691; AVX:       # %bb.0:
692; AVX-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
693; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
694; AVX-NEXT:    retq
695  %b = fadd <2 x double> %vx, <double 42.0, double 12.0>
696  %r = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
697  ret <2 x double> %r
698}
699
700define <4 x double> @splat0_fsub_const_op0_v4f64(double %x) {
701; SSE-LABEL: splat0_fsub_const_op0_v4f64:
702; SSE:       # %bb.0:
703; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
704; SSE-NEXT:    subsd %xmm0, %xmm1
705; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
706; SSE-NEXT:    movapd %xmm1, %xmm0
707; SSE-NEXT:    retq
708;
709; AVX-LABEL: splat0_fsub_const_op0_v4f64:
710; AVX:       # %bb.0:
711; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
712; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
713; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
714; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
715; AVX-NEXT:    retq
716  %vx = insertelement <4 x double> undef, double %x, i32 0
717  %b = fsub <4 x double> <double -42.0, double 42.0, double 0.0, double 1.0>, %vx
718  %r = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
719  ret <4 x double> %r
720}
721
722define <4 x float> @splat0_fmul_const_op1_v4f32(<4 x float> %vx) {
723; SSE-LABEL: splat0_fmul_const_op1_v4f32:
724; SSE:       # %bb.0:
725; SSE-NEXT:    mulss {{.*}}(%rip), %xmm0
726; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
727; SSE-NEXT:    retq
728;
729; AVX-LABEL: splat0_fmul_const_op1_v4f32:
730; AVX:       # %bb.0:
731; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
732; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
733; AVX-NEXT:    retq
734  %b = fmul fast <4 x float> %vx, <float 6.0, float -1.0, float 1.0, float 7.0>
735  %r = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
736  ret <4 x float> %r
737}
738
739define <8 x float> @splat0_fdiv_const_op1_v8f32(<8 x float> %vx) {
740; SSE-LABEL: splat0_fdiv_const_op1_v8f32:
741; SSE:       # %bb.0:
742; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
743; SSE-NEXT:    movaps %xmm0, %xmm1
744; SSE-NEXT:    retq
745;
746; AVX-LABEL: splat0_fdiv_const_op1_v8f32:
747; AVX:       # %bb.0:
748; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
749; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
750; AVX-NEXT:    retq
751  %b = fdiv fast <8 x float> %vx, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
752  %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
753  ret <8 x float> %r
754}
755
756define <8 x float> @splat0_fdiv_const_op0_v8f32(<8 x float> %vx) {
757; SSE-LABEL: splat0_fdiv_const_op0_v8f32:
758; SSE:       # %bb.0:
759; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
760; SSE-NEXT:    divss %xmm0, %xmm1
761; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
762; SSE-NEXT:    movaps %xmm1, %xmm0
763; SSE-NEXT:    retq
764;
765; AVX-LABEL: splat0_fdiv_const_op0_v8f32:
766; AVX:       # %bb.0:
767; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
768; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
769; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
770; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
771; AVX-NEXT:    retq
772  %b = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %vx
773  %r = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
774  ret <8 x float> %r
775}
776
777define <4 x float> @multi_use_binop(<4 x float> %x, <4 x float> %y) {
778; SSE-LABEL: multi_use_binop:
779; SSE:       # %bb.0:
780; SSE-NEXT:    mulps %xmm1, %xmm0
781; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
782; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
783; SSE-NEXT:    addps %xmm1, %xmm0
784; SSE-NEXT:    retq
785;
786; AVX-LABEL: multi_use_binop:
787; AVX:       # %bb.0:
788; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
789; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
790; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
791; AVX-NEXT:    vaddps %xmm0, %xmm1, %xmm0
792; AVX-NEXT:    retq
793  %mul = fmul <4 x float> %x, %y
794  %mul0 = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 0>
795  %mul1 = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 1>
796  %r = fadd <4 x float> %mul0, %mul1
797  ret <4 x float> %r
798}
799