1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
3
4define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
5  ; CHECK: vshf_v16i8_0:
6
7  %1 = load <16 x i8>* %a
8  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
10  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
11  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
12  store <16 x i8> %2, <16 x i8>* %c
13  ; CHECK-DAG: st.b [[R3]], 0($4)
14
15  ret void
16  ; CHECK: .size vshf_v16i8_0
17}
18
19define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
20  ; CHECK: vshf_v16i8_1:
21
22  %1 = load <16 x i8>* %a
23  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
24  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
25  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
26  store <16 x i8> %2, <16 x i8>* %c
27  ; CHECK-DAG: st.b [[R3]], 0($4)
28
29  ret void
30  ; CHECK: .size vshf_v16i8_1
31}
32
33define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
34  ; CHECK: vshf_v16i8_2:
35
36  %1 = load <16 x i8>* %a
37  %2 = load <16 x i8>* %b
38  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
39  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
40  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
41  ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
42  store <16 x i8> %3, <16 x i8>* %c
43  ; CHECK-DAG: st.b [[R3]], 0($4)
44
45  ret void
46  ; CHECK: .size vshf_v16i8_2
47}
48
49define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
50  ; CHECK: vshf_v16i8_3:
51
52  %1 = load <16 x i8>* %a
53  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
54  %2 = load <16 x i8>* %b
55  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
56  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
57  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], %lo
58  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R2]]
59  store <16 x i8> %3, <16 x i8>* %c
60  ; CHECK-DAG: st.b [[R3]], 0($4)
61
62  ret void
63  ; CHECK: .size vshf_v16i8_3
64}
65
66define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
67  ; CHECK: vshf_v16i8_4:
68
69  %1 = load <16 x i8>* %a
70  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
71  %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
72  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
73  store <16 x i8> %2, <16 x i8>* %c
74  ; CHECK-DAG: st.b [[R3]], 0($4)
75
76  ret void
77  ; CHECK: .size vshf_v16i8_4
78}
79
80define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
81  ; CHECK: vshf_v8i16_0:
82
83  %1 = load <8 x i16>* %a
84  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
85  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
86  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
87  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
88  store <8 x i16> %2, <8 x i16>* %c
89  ; CHECK-DAG: st.h [[R3]], 0($4)
90
91  ret void
92  ; CHECK: .size vshf_v8i16_0
93}
94
95define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
96  ; CHECK: vshf_v8i16_1:
97
98  %1 = load <8 x i16>* %a
99  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
100  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
101  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
102  store <8 x i16> %2, <8 x i16>* %c
103  ; CHECK-DAG: st.h [[R3]], 0($4)
104
105  ret void
106  ; CHECK: .size vshf_v8i16_1
107}
108
109define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
110  ; CHECK: vshf_v8i16_2:
111
112  %1 = load <8 x i16>* %a
113  %2 = load <8 x i16>* %b
114  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
115  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
116  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
117  ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
118  store <8 x i16> %3, <8 x i16>* %c
119  ; CHECK-DAG: st.h [[R3]], 0($4)
120
121  ret void
122  ; CHECK: .size vshf_v8i16_2
123}
124
125define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
126  ; CHECK: vshf_v8i16_3:
127
128  %1 = load <8 x i16>* %a
129  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
130  %2 = load <8 x i16>* %b
131  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
132  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
133  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], %lo
134  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R2]]
135  store <8 x i16> %3, <8 x i16>* %c
136  ; CHECK-DAG: st.h [[R3]], 0($4)
137
138  ret void
139  ; CHECK: .size vshf_v8i16_3
140}
141
142define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
143  ; CHECK: vshf_v8i16_4:
144
145  %1 = load <8 x i16>* %a
146  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
147  %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
148  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
149  store <8 x i16> %2, <8 x i16>* %c
150  ; CHECK-DAG: st.h [[R3]], 0($4)
151
152  ret void
153  ; CHECK: .size vshf_v8i16_4
154}
155
156; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
157; instruction when using a single vector.
158
159define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
160  ; CHECK: vshf_v4i32_0:
161
162  %1 = load <4 x i32>* %a
163  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
164  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
165  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
166  store <4 x i32> %2, <4 x i32>* %c
167  ; CHECK-DAG: st.w [[R3]], 0($4)
168
169  ret void
170  ; CHECK: .size vshf_v4i32_0
171}
172
173define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
174  ; CHECK: vshf_v4i32_1:
175
176  %1 = load <4 x i32>* %a
177  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
178  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
179  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
180  store <4 x i32> %2, <4 x i32>* %c
181  ; CHECK-DAG: st.w [[R3]], 0($4)
182
183  ret void
184  ; CHECK: .size vshf_v4i32_1
185}
186
187define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
188  ; CHECK: vshf_v4i32_2:
189
190  %1 = load <4 x i32>* %a
191  %2 = load <4 x i32>* %b
192  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
193  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
194  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
195  store <4 x i32> %3, <4 x i32>* %c
196  ; CHECK-DAG: st.w [[R3]], 0($4)
197
198  ret void
199  ; CHECK: .size vshf_v4i32_2
200}
201
202define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
203  ; CHECK: vshf_v4i32_3:
204
205  %1 = load <4 x i32>* %a
206  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
207  %2 = load <4 x i32>* %b
208  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
209  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
210  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
211  ; CHECK-DAG: vshf.w [[R3]], [[R1]], [[R2]]
212  store <4 x i32> %3, <4 x i32>* %c
213  ; CHECK-DAG: st.w [[R3]], 0($4)
214
215  ret void
216  ; CHECK: .size vshf_v4i32_3
217}
218
219define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
220  ; CHECK: vshf_v4i32_4:
221
222  %1 = load <4 x i32>* %a
223  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
224  %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
225  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
226  store <4 x i32> %2, <4 x i32>* %c
227  ; CHECK-DAG: st.w [[R3]], 0($4)
228
229  ret void
230  ; CHECK: .size vshf_v4i32_4
231}
232
233define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
234  ; CHECK: vshf_v2i64_0:
235
236  %1 = load <2 x i64>* %a
237  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
238  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
239  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
240  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
241  store <2 x i64> %2, <2 x i64>* %c
242  ; CHECK-DAG: st.d [[R3]], 0($4)
243
244  ret void
245  ; CHECK: .size vshf_v2i64_0
246}
247
248define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
249  ; CHECK: vshf_v2i64_1:
250
251  %1 = load <2 x i64>* %a
252  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
253  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
254  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
255  store <2 x i64> %2, <2 x i64>* %c
256  ; CHECK-DAG: st.d [[R3]], 0($4)
257
258  ret void
259  ; CHECK: .size vshf_v2i64_1
260}
261
262define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
263  ; CHECK: vshf_v2i64_2:
264
265  %1 = load <2 x i64>* %a
266  %2 = load <2 x i64>* %b
267  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
268  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
269  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
270  ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
271  store <2 x i64> %3, <2 x i64>* %c
272  ; CHECK-DAG: st.d [[R3]], 0($4)
273
274  ret void
275  ; CHECK: .size vshf_v2i64_2
276}
277
278define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
279  ; CHECK: vshf_v2i64_3:
280
281  %1 = load <2 x i64>* %a
282  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
283  %2 = load <2 x i64>* %b
284  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
285  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
286  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo
287  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R2]]
288  store <2 x i64> %3, <2 x i64>* %c
289  ; CHECK-DAG: st.d [[R3]], 0($4)
290
291  ret void
292  ; CHECK: .size vshf_v2i64_3
293}
294
295define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
296  ; CHECK: vshf_v2i64_4:
297
298  %1 = load <2 x i64>* %a
299  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
300  %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
301  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
302  store <2 x i64> %2, <2 x i64>* %c
303  ; CHECK-DAG: st.d [[R3]], 0($4)
304
305  ret void
306  ; CHECK: .size vshf_v2i64_4
307}
308
309define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
310  ; CHECK: shf_v16i8_0:
311
312  %1 = load <16 x i8>* %a
313  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
314  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
315  ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
316  store <16 x i8> %2, <16 x i8>* %c
317  ; CHECK-DAG: st.b [[R3]], 0($4)
318
319  ret void
320  ; CHECK: .size shf_v16i8_0
321}
322
323define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
324  ; CHECK: shf_v8i16_0:
325
326  %1 = load <8 x i16>* %a
327  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
328  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
329  ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
330  store <8 x i16> %2, <8 x i16>* %c
331  ; CHECK-DAG: st.h [[R3]], 0($4)
332
333  ret void
334  ; CHECK: .size shf_v8i16_0
335}
336
337define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
338  ; CHECK: shf_v4i32_0:
339
340  %1 = load <4 x i32>* %a
341  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
342  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
343  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
344  store <4 x i32> %2, <4 x i32>* %c
345  ; CHECK-DAG: st.w [[R3]], 0($4)
346
347  ret void
348  ; CHECK: .size shf_v4i32_0
349}
350
351; shf.d does not exist
352
353define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
354  ; CHECK: ilvev_v16i8_0:
355
356  %1 = load <16 x i8>* %a
357  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
358  %2 = load <16 x i8>* %b
359  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
360  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
361                     <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
362  ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
363  store <16 x i8> %3, <16 x i8>* %c
364  ; CHECK-DAG: st.b [[R3]], 0($4)
365
366  ret void
367  ; CHECK: .size ilvev_v16i8_0
368}
369
370define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
371  ; CHECK: ilvev_v8i16_0:
372
373  %1 = load <8 x i16>* %a
374  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
375  %2 = load <8 x i16>* %b
376  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
377  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
378  ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
379  store <8 x i16> %3, <8 x i16>* %c
380  ; CHECK-DAG: st.h [[R3]], 0($4)
381
382  ret void
383  ; CHECK: .size ilvev_v8i16_0
384}
385
386define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
387  ; CHECK: ilvev_v4i32_0:
388
389  %1 = load <4 x i32>* %a
390  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
391  %2 = load <4 x i32>* %b
392  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
393  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
394  ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
395  store <4 x i32> %3, <4 x i32>* %c
396  ; CHECK-DAG: st.w [[R3]], 0($4)
397
398  ret void
399  ; CHECK: .size ilvev_v4i32_0
400}
401
402define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
403  ; CHECK: ilvev_v2i64_0:
404
405  %1 = load <2 x i64>* %a
406  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
407  %2 = load <2 x i64>* %b
408  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
409  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
410  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
411  store <2 x i64> %3, <2 x i64>* %c
412  ; CHECK-DAG: st.d [[R3]], 0($4)
413
414  ret void
415  ; CHECK: .size ilvev_v2i64_0
416}
417
418define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
419  ; CHECK: ilvod_v16i8_0:
420
421  %1 = load <16 x i8>* %a
422  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
423  %2 = load <16 x i8>* %b
424  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
425  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
426                     <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
427  ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
428  store <16 x i8> %3, <16 x i8>* %c
429  ; CHECK-DAG: st.b [[R3]], 0($4)
430
431  ret void
432  ; CHECK: .size ilvod_v16i8_0
433}
434
435define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
436  ; CHECK: ilvod_v8i16_0:
437
438  %1 = load <8 x i16>* %a
439  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
440  %2 = load <8 x i16>* %b
441  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
442  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
443  ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
444  store <8 x i16> %3, <8 x i16>* %c
445  ; CHECK-DAG: st.h [[R3]], 0($4)
446
447  ret void
448  ; CHECK: .size ilvod_v8i16_0
449}
450
451define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
452  ; CHECK: ilvod_v4i32_0:
453
454  %1 = load <4 x i32>* %a
455  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
456  %2 = load <4 x i32>* %b
457  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
458  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
459  ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
460  store <4 x i32> %3, <4 x i32>* %c
461  ; CHECK-DAG: st.w [[R3]], 0($4)
462
463  ret void
464  ; CHECK: .size ilvod_v4i32_0
465}
466
467define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
468  ; CHECK: ilvod_v2i64_0:
469
470  %1 = load <2 x i64>* %a
471  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
472  %2 = load <2 x i64>* %b
473  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
474  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
475  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
476  store <2 x i64> %3, <2 x i64>* %c
477  ; CHECK-DAG: st.d [[R3]], 0($4)
478
479  ret void
480  ; CHECK: .size ilvod_v2i64_0
481}
482
483define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
484  ; CHECK: ilvl_v16i8_0:
485
486  %1 = load <16 x i8>* %a
487  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
488  %2 = load <16 x i8>* %b
489  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
490  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
491                     <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
492  ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
493  store <16 x i8> %3, <16 x i8>* %c
494  ; CHECK-DAG: st.b [[R3]], 0($4)
495
496  ret void
497  ; CHECK: .size ilvl_v16i8_0
498}
499
500define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
501  ; CHECK: ilvl_v8i16_0:
502
503  %1 = load <8 x i16>* %a
504  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
505  %2 = load <8 x i16>* %b
506  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
507  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
508  ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
509  store <8 x i16> %3, <8 x i16>* %c
510  ; CHECK-DAG: st.h [[R3]], 0($4)
511
512  ret void
513  ; CHECK: .size ilvl_v8i16_0
514}
515
516define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
517  ; CHECK: ilvl_v4i32_0:
518
519  %1 = load <4 x i32>* %a
520  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
521  %2 = load <4 x i32>* %b
522  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
523  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
524  ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
525  store <4 x i32> %3, <4 x i32>* %c
526  ; CHECK-DAG: st.w [[R3]], 0($4)
527
528  ret void
529  ; CHECK: .size ilvl_v4i32_0
530}
531
532define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
533  ; CHECK: ilvl_v2i64_0:
534
535  %1 = load <2 x i64>* %a
536  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
537  %2 = load <2 x i64>* %b
538  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
539  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
540  ; ilvl.d and ilvev.d are equivalent for v2i64
541  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
542  store <2 x i64> %3, <2 x i64>* %c
543  ; CHECK-DAG: st.d [[R3]], 0($4)
544
545  ret void
546  ; CHECK: .size ilvl_v2i64_0
547}
548
549define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
550  ; CHECK: ilvr_v16i8_0:
551
552  %1 = load <16 x i8>* %a
553  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
554  %2 = load <16 x i8>* %b
555  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
556  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
557                     <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
558  ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
559  store <16 x i8> %3, <16 x i8>* %c
560  ; CHECK-DAG: st.b [[R3]], 0($4)
561
562  ret void
563  ; CHECK: .size ilvr_v16i8_0
564}
565
566define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
567  ; CHECK: ilvr_v8i16_0:
568
569  %1 = load <8 x i16>* %a
570  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
571  %2 = load <8 x i16>* %b
572  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
573  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
574  ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
575  store <8 x i16> %3, <8 x i16>* %c
576  ; CHECK-DAG: st.h [[R3]], 0($4)
577
578  ret void
579  ; CHECK: .size ilvr_v8i16_0
580}
581
582define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
583  ; CHECK: ilvr_v4i32_0:
584
585  %1 = load <4 x i32>* %a
586  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
587  %2 = load <4 x i32>* %b
588  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
589  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
590  ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
591  store <4 x i32> %3, <4 x i32>* %c
592  ; CHECK-DAG: st.w [[R3]], 0($4)
593
594  ret void
595  ; CHECK: .size ilvr_v4i32_0
596}
597
598define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
599  ; CHECK: ilvr_v2i64_0:
600
601  %1 = load <2 x i64>* %a
602  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
603  %2 = load <2 x i64>* %b
604  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
605  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
606  ; ilvr.d and ilvod.d are equivalent for v2i64
607  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
608  store <2 x i64> %3, <2 x i64>* %c
609  ; CHECK-DAG: st.d [[R3]], 0($4)
610
611  ret void
612  ; CHECK: .size ilvr_v2i64_0
613}
614
615define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
616  ; CHECK: pckev_v16i8_0:
617
618  %1 = load <16 x i8>* %a
619  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
620  %2 = load <16 x i8>* %b
621  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
622  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
623                     <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
624  ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
625  store <16 x i8> %3, <16 x i8>* %c
626  ; CHECK-DAG: st.b [[R3]], 0($4)
627
628  ret void
629  ; CHECK: .size pckev_v16i8_0
630}
631
632define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
633  ; CHECK: pckev_v8i16_0:
634
635  %1 = load <8 x i16>* %a
636  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
637  %2 = load <8 x i16>* %b
638  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
639  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
640  ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
641  store <8 x i16> %3, <8 x i16>* %c
642  ; CHECK-DAG: st.h [[R3]], 0($4)
643
644  ret void
645  ; CHECK: .size pckev_v8i16_0
646}
647
648define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
649  ; CHECK: pckev_v4i32_0:
650
651  %1 = load <4 x i32>* %a
652  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
653  %2 = load <4 x i32>* %b
654  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
655  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
656  ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
657  store <4 x i32> %3, <4 x i32>* %c
658  ; CHECK-DAG: st.w [[R3]], 0($4)
659
660  ret void
661  ; CHECK: .size pckev_v4i32_0
662}
663
664define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
665  ; CHECK: pckev_v2i64_0:
666
667  %1 = load <2 x i64>* %a
668  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
669  %2 = load <2 x i64>* %b
670  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
671  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
672  ; pckev.d and ilvev.d are equivalent for v2i64
673  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
674  store <2 x i64> %3, <2 x i64>* %c
675  ; CHECK-DAG: st.d [[R3]], 0($4)
676
677  ret void
678  ; CHECK: .size pckev_v2i64_0
679}
680
681define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
682  ; CHECK: pckod_v16i8_0:
683
684  %1 = load <16 x i8>* %a
685  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
686  %2 = load <16 x i8>* %b
687  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
688  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
689                     <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
690  ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
691  store <16 x i8> %3, <16 x i8>* %c
692  ; CHECK-DAG: st.b [[R3]], 0($4)
693
694  ret void
695  ; CHECK: .size pckod_v16i8_0
696}
697
698define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
699  ; CHECK: pckod_v8i16_0:
700
701  %1 = load <8 x i16>* %a
702  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
703  %2 = load <8 x i16>* %b
704  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
705  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
706  ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
707  store <8 x i16> %3, <8 x i16>* %c
708  ; CHECK-DAG: st.h [[R3]], 0($4)
709
710  ret void
711  ; CHECK: .size pckod_v8i16_0
712}
713
714define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
715  ; CHECK: pckod_v4i32_0:
716
717  %1 = load <4 x i32>* %a
718  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
719  %2 = load <4 x i32>* %b
720  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
721  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
722  ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
723  store <4 x i32> %3, <4 x i32>* %c
724  ; CHECK-DAG: st.w [[R3]], 0($4)
725
726  ret void
727  ; CHECK: .size pckod_v4i32_0
728}
729
730define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
731  ; CHECK: pckod_v2i64_0:
732
733  %1 = load <2 x i64>* %a
734  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
735  %2 = load <2 x i64>* %b
736  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
737  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
738  ; pckod.d and ilvod.d are equivalent for v2i64
739  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
740  store <2 x i64> %3, <2 x i64>* %c
741  ; CHECK-DAG: st.d [[R3]], 0($4)
742
743  ret void
744  ; CHECK: .size pckod_v2i64_0
745}
746
747define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
748  ; CHECK: splati_v16i8_0:
749
750  %1 = load <16 x i8>* %a
751  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
752  %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
753                     <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
754  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
755  store <16 x i8> %2, <16 x i8>* %c
756  ; CHECK-DAG: st.b [[R3]], 0($4)
757
758  ret void
759  ; CHECK: .size splati_v16i8_0
760}
761
762define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
763  ; CHECK: splati_v8i16_0:
764
765  %1 = load <8 x i16>* %a
766  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
767  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
768  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
769  store <8 x i16> %2, <8 x i16>* %c
770  ; CHECK-DAG: st.h [[R3]], 0($4)
771
772  ret void
773  ; CHECK: .size splati_v8i16_0
774}
775
776define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
777  ; CHECK: splati_v4i32_0:
778
779  %1 = load <4 x i32>* %a
780  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
781  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
782  ; shf.w and splati.w are equivalent
783  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
784  store <4 x i32> %2, <4 x i32>* %c
785  ; CHECK-DAG: st.w [[R3]], 0($4)
786
787  ret void
788  ; CHECK: .size splati_v4i32_0
789}
790
791define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
792  ; CHECK: splati_v2i64_0:
793
794  %1 = load <2 x i64>* %a
795  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
796  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
797  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
798  store <2 x i64> %2, <2 x i64>* %c
799  ; CHECK-DAG: st.d [[R3]], 0($4)
800
801  ret void
802  ; CHECK: .size splati_v2i64_0
803}
804