1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3
4; Each pair of tests should be logically equivalent,
5; so codegen should be the same and optimal for each pair.
6
7define <4 x float> @splat0_before_fmul_constant(<4 x float> %a) {
8; CHECK-LABEL: splat0_before_fmul_constant:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    fmov v1.4s, #3.00000000
11; CHECK-NEXT:    fmul v0.4s, v1.4s, v0.s[0]
12; CHECK-NEXT:    ret
13  %splat = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
14  %mul = fmul <4 x float> %splat, <float 3.0, float 3.0, float 3.0, float 3.0>
15  ret <4 x float> %mul
16}
17
18define <4 x float> @splat0_after_fmul_constant(<4 x float> %a) {
19; CHECK-LABEL: splat0_after_fmul_constant:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    fmov v1.4s, #3.00000000
22; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
23; CHECK-NEXT:    dup v0.4s, v0.s[0]
24; CHECK-NEXT:    ret
25  %mul = fmul <4 x float> %a, <float 3.0, float 42.0, float 3.0, float 3.0>
26  %splat = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> zeroinitializer
27  ret <4 x float> %splat
28}
29
30; Try different type and splat lane.
31
32define <2 x double> @splat1_before_fmul_constant(<2 x double> %a) {
33; CHECK-LABEL: splat1_before_fmul_constant:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    fmov v1.2d, #5.00000000
36; CHECK-NEXT:    fmul v0.2d, v1.2d, v0.d[1]
37; CHECK-NEXT:    ret
38  %splat = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 1>
39  %mul = fmul <2 x double> %splat, <double 5.0, double 5.0>
40  ret <2 x double> %mul
41}
42
43define <2 x double> @splat1_after_fmul_constant(<2 x double> %a) {
44; CHECK-LABEL: splat1_after_fmul_constant:
45; CHECK:       // %bb.0:
46; CHECK-NEXT:    fmov v1.2d, #5.00000000
47; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
48; CHECK-NEXT:    dup v0.2d, v0.d[1]
49; CHECK-NEXT:    ret
50  %mul = fmul <2 x double> %a, <double -1.0, double 5.0>
51  %splat = shufflevector <2 x double> %mul, <2 x double> undef, <2 x i32> <i32 1, i32 1>
52  ret <2 x double> %splat
53}
54
55; 2 variable operands
56
57define <2 x double> @splat1_before_fmul(<2 x double> %a, <2 x double> %b) {
58; CHECK-LABEL: splat1_before_fmul:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
61; CHECK-NEXT:    dup v0.2d, v0.d[1]
62; CHECK-NEXT:    ret
63  %splata = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 1>
64  %splatb = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> <i32 1, i32 1>
65  %mul = fmul <2 x double> %splata, %splatb
66  ret <2 x double> %mul
67}
68
69define <2 x double> @splat1_after_fmul(<2 x double> %a, <2 x double> %b) {
70; CHECK-LABEL: splat1_after_fmul:
71; CHECK:       // %bb.0:
72; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
73; CHECK-NEXT:    dup v0.2d, v0.d[1]
74; CHECK-NEXT:    ret
75  %mul = fmul <2 x double> %a, %b
76  %splat = shufflevector <2 x double> %mul, <2 x double> undef, <2 x i32> <i32 1, i32 1>
77  ret <2 x double> %splat
78}
79
80; Integer multiply
81
82define <4 x i32> @splat2_before_mul_constant(<4 x i32> %a) {
83; CHECK-LABEL: splat2_before_mul_constant:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    movi v1.4s, #3
86; CHECK-NEXT:    mul v0.4s, v1.4s, v0.s[2]
87; CHECK-NEXT:    ret
88  %splat = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
89  %mul = mul <4 x i32> %splat, <i32 3, i32 3, i32 3, i32 3>
90  ret <4 x i32> %mul
91}
92
93define <4 x i32> @splat2_after_mul_constant(<4 x i32> %a) {
94; CHECK-LABEL: splat2_after_mul_constant:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    movi v1.4s, #3
97; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
98; CHECK-NEXT:    dup v0.4s, v0.s[2]
99; CHECK-NEXT:    ret
100  %mul = mul <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
101  %splat = shufflevector <4 x i32> %mul, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
102  ret <4 x i32> %splat
103}
104
105; Different type, lane, and 2 variable operands.
106
107define <8 x i16> @splat1_before_mul(<8 x i16> %a, <8 x i16> %b) {
108; CHECK-LABEL: splat1_before_mul:
109; CHECK:       // %bb.0:
110; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
111; CHECK-NEXT:    dup v0.8h, v0.h[1]
112; CHECK-NEXT:    ret
113  %splata = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
114  %splatb = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
115  %mul = mul <8 x i16> %splata, %splatb
116  ret <8 x i16> %mul
117}
118
119define <8 x i16> @splat1_after_mul(<8 x i16> %a, <8 x i16> %b) {
120; CHECK-LABEL: splat1_after_mul:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
123; CHECK-NEXT:    dup v0.8h, v0.h[1]
124; CHECK-NEXT:    ret
125  %mul = mul <8 x i16> %a, %b
126  %splat = shufflevector <8 x i16> %mul, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
127  ret <8 x i16> %splat
128}
129
130; Multiple multiplies.
131
132define <4 x float> @splat0_before_fmul_fmul_constant(<4 x float> %a) {
133; CHECK-LABEL: splat0_before_fmul_fmul_constant:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    fmov v1.4s, #3.00000000
136; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
137; CHECK-NEXT:    fmov v1.4s, #6.00000000
138; CHECK-NEXT:    fmul v0.4s, v1.4s, v0.s[0]
139; CHECK-NEXT:    ret
140  %splat1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
141  %mul1 = fmul <4 x float> %splat1, <float 3.0, float 3.0, float 3.0, float 3.0>
142  %splat2 = shufflevector <4 x float> %mul1, <4 x float> undef, <4 x i32> zeroinitializer
143  %mul2 = fmul <4 x float> %splat2, <float 6.0, float 6.0, float 6.0, float 6.0>
144  ret <4 x float> %mul2
145}
146
147define <4 x float> @splat0_after_fmul_fmul_constant(<4 x float> %a) {
148; CHECK-LABEL: splat0_after_fmul_fmul_constant:
149; CHECK:       // %bb.0:
150; CHECK-NEXT:    fmov v1.4s, #3.00000000
151; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
152; CHECK-NEXT:    fmov v1.4s, #6.00000000
153; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
154; CHECK-NEXT:    dup v0.4s, v0.s[0]
155; CHECK-NEXT:    ret
156  %mul1 = fmul <4 x float> %a, <float 3.0, float 42.0, float 3.0, float 3.0>
157  %mul2 = fmul <4 x float> %mul1, <float 6.0, float 42.0, float 3.0, float 3.0>
158  %splat = shufflevector <4 x float> %mul2, <4 x float> undef, <4 x i32> zeroinitializer
159  ret <4 x float> %splat
160}
161