1; RUN: llc  -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
2; RUN:        < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s
3
4define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
5; CHECK-LABEL: test_select_cc_v8i8_i8:
6; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
7; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
8; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b
9; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0]
10; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
11  %cmp31 = icmp eq i8 %a, %b
12  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
13  ret <8x i8> %e
14}
15
16define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
17; CHECK-LABEL: test_select_cc_v8i8_f32:
18; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
19; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
20; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b
21  %cmp31 = fcmp oeq float %a, %b
22  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
23  ret <8x i8> %e
24}
25
26define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
27; CHECK-LABEL: test_select_cc_v8i8_f64:
28; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
29; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b
30  %cmp31 = fcmp oeq double %a, %b
31  %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
32  ret <8x i8> %e
33}
34
35define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
36; CHECK-LABEL: test_select_cc_v16i8_i8:
37; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
38; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
39; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b
40; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0]
41; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
42  %cmp31 = icmp eq i8 %a, %b
43  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
44  ret <16x i8> %e
45}
46
47define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
48; CHECK-LABEL: test_select_cc_v16i8_f32:
49; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
50; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
51; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
52  %cmp31 = fcmp oeq float %a, %b
53  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
54  ret <16x i8> %e
55}
56
57define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
58; CHECK-LABEL: test_select_cc_v16i8_f64:
59; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
60; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
61; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
62  %cmp31 = fcmp oeq double %a, %b
63  %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
64  ret <16x i8> %e
65}
66
67define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
68; CHECK-LABEL: test_select_cc_v4i16:
69; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
70; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
71; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h
72; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0]
73; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
74  %cmp31 = icmp eq i16 %a, %b
75  %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
76  ret <4x i16> %e
77}
78
79define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
80; CHECK-LABEL: test_select_cc_v8i16:
81; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
82; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
83; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h
84; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0]
85; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
86  %cmp31 = icmp eq i16 %a, %b
87  %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
88  ret <8x i16> %e
89}
90
91define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
92; CHECK-LABEL: test_select_cc_v2i32:
93; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
94; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
95; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s
96; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
97; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
98  %cmp31 = icmp eq i32 %a, %b
99  %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
100  ret <2x i32> %e
101}
102
103define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
104; CHECK-LABEL: test_select_cc_v4i32:
105; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
106; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
107; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
108; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
109; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
110  %cmp31 = icmp eq i32 %a, %b
111  %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
112  ret <4x i32> %e
113}
114
115define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
116; CHECK-LABEL: test_select_cc_v1i64:
117; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
118; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
119; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]]
120; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
121  %cmp31 = icmp eq i64 %a, %b
122  %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
123  ret <1x i64> %e
124}
125
126define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
127; CHECK-LABEL: test_select_cc_v2i64:
128; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
129; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
130; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d
131; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
132; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
133  %cmp31 = icmp eq i64 %a, %b
134  %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
135  ret <2x i64> %e
136}
137
138define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
139; CHECK-LABEL: test_select_cc_v1f32:
140; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
141; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
142  %cmp31 = fcmp oeq float %a, %b
143  %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
144  ret <1 x float> %e
145}
146
147define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
148; CHECK-LABEL: test_select_cc_v2f32:
149; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
150; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
151; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b
152  %cmp31 = fcmp oeq float %a, %b
153  %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
154  ret <2 x float> %e
155}
156
157define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
158; CHECK-LABEL: test_select_cc_v4f32:
159; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
160; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
161; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
162  %cmp31 = fcmp oeq float %a, %b
163  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
164  ret <4x float> %e
165}
166
167define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
168; CHECK-LABEL: test_select_cc_v4f32_icmp:
169; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
170; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
171; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
172; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
173; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
174  %cmp31 = icmp eq i32 %a, %b
175  %e = select i1 %cmp31, <4x float> %c, <4x float> %d
176  ret <4x float> %e
177}
178
179define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
180; CHECK-LABEL: test_select_cc_v1f64:
181; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
182; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b
183  %cmp31 = fcmp oeq double %a, %b
184  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
185  ret <1 x double> %e
186}
187
188define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
189; CHECK-LABEL: test_select_cc_v1f64_icmp:
190; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0
191; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1
192; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]]
193; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
194  %cmp31 = icmp eq i64 %a, %b
195  %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
196  ret <1 x double> %e
197}
198
199define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
200; CHECK-LABEL: test_select_cc_v2f64:
201; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
202; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
203; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
204  %cmp31 = fcmp oeq double %a, %b
205  %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
206  ret <2 x double> %e
207}
208
209; Special case: when the select condition is an icmp with i1 operands, don't
210; do the comparison on vectors.
211; Part of PR21549.
212define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) {
213; CHECK-LABEL: test_select_cc_v2i32_icmpi1:
214; CHECK: tst   w0, #0x1
215; CHECK: csetm [[MASK:w[0-9]+]], ne
216; CHECK: dup   [[DUPMASK:v[0-9]+]].2s, [[MASK]]
217; CHECK: bsl   [[DUPMASK]].8b, v0.8b, v1.8b
218; CHECK: mov   v0.16b, [[DUPMASK]].16b
219  %cmp = icmp ne i1 %cc, 0
220  %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
221  ret <2 x i32> %e
222}
223
224; Also make sure we support irregular/non-power-of-2 types such as v3f32.
225define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
226; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
227; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s
228; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
229; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
230; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
231; CHECK-NEXT: ret
232  %cc = fcmp oeq float %c1, %c2
233  %r = select i1 %cc, <3 x float> %a, <3 x float> %b
234  ret <3 x float> %r
235}
236
237define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
238; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
239; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d
240; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
241; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
242; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
243; CHECK-NEXT: ret
244  %cc = fcmp oeq double %c1, %c2
245  %r = select i1 %cc, <3 x float> %a, <3 x float> %b
246  ret <3 x float> %r
247}
248
249attributes #0 = { nounwind}
250