1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
2
3define <8 x i8> @v_dup8(i8 %A) nounwind {
4;CHECK-LABEL: v_dup8:
5;CHECK: vdup.8
6	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
7	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
8	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
9	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
10	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
11	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
12	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
13	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
14	ret <8 x i8> %tmp8
15}
16
17define <4 x i16> @v_dup16(i16 %A) nounwind {
18;CHECK-LABEL: v_dup16:
19;CHECK: vdup.16
20	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
21	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
22	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
23	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
24	ret <4 x i16> %tmp4
25}
26
27define <2 x i32> @v_dup32(i32 %A) nounwind {
28;CHECK-LABEL: v_dup32:
29;CHECK: vdup.32
30	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
31	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
32	ret <2 x i32> %tmp2
33}
34
35define <2 x float> @v_dupfloat(float %A) nounwind {
36;CHECK-LABEL: v_dupfloat:
37;CHECK: vdup.32
38	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
39	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
40	ret <2 x float> %tmp2
41}
42
43define <16 x i8> @v_dupQ8(i8 %A) nounwind {
44;CHECK-LABEL: v_dupQ8:
45;CHECK: vdup.8
46	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
47	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
48	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
49	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
50	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
51	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
52	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
53	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
54	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
55	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
56	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
57	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
58	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
59	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
60	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
61	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
62	ret <16 x i8> %tmp16
63}
64
65define <8 x i16> @v_dupQ16(i16 %A) nounwind {
66;CHECK-LABEL: v_dupQ16:
67;CHECK: vdup.16
68	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
69	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
70	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
71	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
72	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
73	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
74	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
75	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
76	ret <8 x i16> %tmp8
77}
78
79define <4 x i32> @v_dupQ32(i32 %A) nounwind {
80;CHECK-LABEL: v_dupQ32:
81;CHECK: vdup.32
82	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
83	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
84	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
85	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
86	ret <4 x i32> %tmp4
87}
88
89define <4 x float> @v_dupQfloat(float %A) nounwind {
90;CHECK-LABEL: v_dupQfloat:
91;CHECK: vdup.32
92	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
93	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
94	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
95	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
96	ret <4 x float> %tmp4
97}
98
99; Check to make sure it works with shuffles, too.
100
101define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
102;CHECK-LABEL: v_shuffledup8:
103;CHECK: vdup.8
104	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
105	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
106	ret <8 x i8> %tmp2
107}
108
109define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
110;CHECK-LABEL: v_shuffledup16:
111;CHECK: vdup.16
112	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
113	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
114	ret <4 x i16> %tmp2
115}
116
117define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
118;CHECK-LABEL: v_shuffledup32:
119;CHECK: vdup.32
120	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
121	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
122	ret <2 x i32> %tmp2
123}
124
125define <2 x float> @v_shuffledupfloat(float %A) nounwind {
126;CHECK-LABEL: v_shuffledupfloat:
127;CHECK: vdup.32
128	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
129	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
130	ret <2 x float> %tmp2
131}
132
133define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
134;CHECK-LABEL: v_shuffledupQ8:
135;CHECK: vdup.8
136	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
137	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
138	ret <16 x i8> %tmp2
139}
140
141define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
142;CHECK-LABEL: v_shuffledupQ16:
143;CHECK: vdup.16
144	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
145	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
146	ret <8 x i16> %tmp2
147}
148
149define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
150;CHECK-LABEL: v_shuffledupQ32:
151;CHECK: vdup.32
152	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
153	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
154	ret <4 x i32> %tmp2
155}
156
157define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
158;CHECK-LABEL: v_shuffledupQfloat:
159;CHECK: vdup.32
160	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
161	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
162	ret <4 x float> %tmp2
163}
164
165define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
166;CHECK-LABEL: vduplane8:
167;CHECK: vdup.8
168	%tmp1 = load <8 x i8>* %A
169	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
170	ret <8 x i8> %tmp2
171}
172
173define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
174;CHECK-LABEL: vduplane16:
175;CHECK: vdup.16
176	%tmp1 = load <4 x i16>* %A
177	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
178	ret <4 x i16> %tmp2
179}
180
181define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
182;CHECK-LABEL: vduplane32:
183;CHECK: vdup.32
184	%tmp1 = load <2 x i32>* %A
185	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
186	ret <2 x i32> %tmp2
187}
188
189define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
190;CHECK-LABEL: vduplanefloat:
191;CHECK: vdup.32
192	%tmp1 = load <2 x float>* %A
193	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
194	ret <2 x float> %tmp2
195}
196
197define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
198;CHECK-LABEL: vduplaneQ8:
199;CHECK: vdup.8
200	%tmp1 = load <8 x i8>* %A
201	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
202	ret <16 x i8> %tmp2
203}
204
205define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
206;CHECK-LABEL: vduplaneQ16:
207;CHECK: vdup.16
208	%tmp1 = load <4 x i16>* %A
209	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
210	ret <8 x i16> %tmp2
211}
212
213define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
214;CHECK-LABEL: vduplaneQ32:
215;CHECK: vdup.32
216	%tmp1 = load <2 x i32>* %A
217	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
218	ret <4 x i32> %tmp2
219}
220
221define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
222;CHECK-LABEL: vduplaneQfloat:
223;CHECK: vdup.32
224	%tmp1 = load <2 x float>* %A
225	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
226	ret <4 x float> %tmp2
227}
228
229define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
230entry:
231  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
232  ret <2 x i64> %0
233}
234
235define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
236entry:
237  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
238  ret <2 x i64> %0
239}
240
241define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
242entry:
243  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
244  ret <2 x double> %0
245}
246
247define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
248entry:
249  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
250  ret <2 x double> %0
251}
252
253; Radar 7373643
254;CHECK-LABEL: redundantVdup:
255;CHECK: vmov.i8
256;CHECK-NOT: vdup.8
257;CHECK: vstr
258define void @redundantVdup(<8 x i8>* %ptr) nounwind {
259  %1 = insertelement <8 x i8> undef, i8 -128, i32 0
260  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
261  store <8 x i8> %2, <8 x i8>* %ptr, align 8
262  ret void
263}
264
265define <4 x i32> @tdupi(i32 %x, i32 %y) {
266;CHECK-LABEL: tdupi:
267;CHECK: vdup.32
268  %1 = insertelement <4 x i32> undef, i32 %x, i32 0
269  %2 = insertelement <4 x i32> %1, i32 %x, i32 1
270  %3 = insertelement <4 x i32> %2, i32 %x, i32 2
271  %4 = insertelement <4 x i32> %3, i32 %y, i32 3
272  ret <4 x i32> %4
273}
274
275define <4 x float> @tdupf(float %x, float %y) {
276;CHECK-LABEL: tdupf:
277;CHECK: vdup.32
278  %1 = insertelement <4 x float> undef, float %x, i32 0
279  %2 = insertelement <4 x float> %1, float %x, i32 1
280  %3 = insertelement <4 x float> %2, float %x, i32 2
281  %4 = insertelement <4 x float> %3, float %y, i32 3
282  ret <4 x float> %4
283}
284
285; This test checks that when splatting an element from a vector into another,
286; the value isn't moved out to GPRs first.
287define <4 x i32> @tduplane(<4 x i32> %invec) {
288;CHECK-LABEL: tduplane:
289;CHECK-NOT: vmov {{.*}}, d16[1]
290;CHECK: vdup.32 {{.*}}, d16[1]
291  %in = extractelement <4 x i32> %invec, i32 1
292  %1 = insertelement <4 x i32> undef, i32 %in, i32 0
293  %2 = insertelement <4 x i32> %1, i32 %in, i32 1
294  %3 = insertelement <4 x i32> %2, i32 %in, i32 2
295  %4 = insertelement <4 x i32> %3, i32 255, i32 3
296  ret <4 x i32> %4
297}
298
299define <2 x float> @check_f32(<4 x float> %v) nounwind {
300;CHECK-LABEL: check_f32:
301;CHECK: vdup.32 {{.*}}, d{{..}}[1]
302  %x = extractelement <4 x float> %v, i32 3
303  %1 = insertelement  <2 x float> undef, float %x, i32 0
304  %2 = insertelement  <2 x float> %1, float %x, i32 1
305  ret <2 x float> %2
306}
307
308define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
309;CHECK-LABEL: check_i32:
310;CHECK: vdup.32 {{.*}}, d{{..}}[1]
311  %x = extractelement <4 x i32> %v, i32 3
312  %1 = insertelement  <2 x i32> undef, i32 %x, i32 0
313  %2 = insertelement  <2 x i32> %1, i32 %x, i32 1
314  ret <2 x i32> %2
315}
316
317define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
318;CHECK-LABEL: check_i16:
319;CHECK: vdup.16 {{.*}}, d{{..}}[3]
320  %x = extractelement <8 x i16> %v, i32 3
321  %1 = insertelement  <4 x i16> undef, i16 %x, i32 0
322  %2 = insertelement  <4 x i16> %1, i16 %x, i32 1
323  ret <4 x i16> %2
324}
325
326define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
327;CHECK-LABEL: check_i8:
328;CHECK: vdup.8 {{.*}}, d{{..}}[3]
329  %x = extractelement <16 x i8> %v, i32 3
330  %1 = insertelement  <8  x i8> undef, i8 %x, i32 0
331  %2 = insertelement  <8  x i8> %1, i8 %x, i32 1
332  ret <8 x i8> %2
333}
334