1; RUN: llc -march=mips -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s
3
4define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
5  ; CHECK-LABEL: vshf_v16i8_0:
6
7  %1 = load <16 x i8>, <16 x i8>* %a
8  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
10  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
11  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
12  ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
13  store <16 x i8> %2, <16 x i8>* %c
14  ; CHECK-DAG: st.b [[R3]], 0($4)
15
16  ret void
17}
18
19define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
20  ; CHECK-LABEL: vshf_v16i8_1:
21
22  %1 = load <16 x i8>, <16 x i8>* %a
23  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
24  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
25  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
26  store <16 x i8> %2, <16 x i8>* %c
27  ; CHECK-DAG: st.b [[R3]], 0($4)
28
29  ret void
30}
31
32define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
33  ; CHECK-LABEL: vshf_v16i8_2:
34
35  %1 = load <16 x i8>, <16 x i8>* %a
36  %2 = load <16 x i8>, <16 x i8>* %b
37  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
38  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
39  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
40  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
41  ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
42  store <16 x i8> %3, <16 x i8>* %c
43  ; CHECK-DAG: st.b [[R3]], 0($4)
44
45  ret void
46}
47
48define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
49  ; CHECK-LABEL: vshf_v16i8_3:
50
51  %1 = load <16 x i8>, <16 x i8>* %a
52  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
53  %2 = load <16 x i8>, <16 x i8>* %b
54  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
55  %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
56  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
57  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
58  ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
59  ; the operands to get the right answer.
60  ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R1]]
61  store <16 x i8> %3, <16 x i8>* %c
62  ; CHECK-DAG: st.b [[R3]], 0($4)
63
64  ret void
65}
66
67define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
68  ; CHECK-LABEL: vshf_v16i8_4:
69
70  %1 = load <16 x i8>, <16 x i8>* %a
71  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72  %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
73  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
74  store <16 x i8> %2, <16 x i8>* %c
75  ; CHECK-DAG: st.b [[R3]], 0($4)
76
77  ret void
78}
79
80define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
81  ; CHECK-LABEL: vshf_v8i16_0:
82
83  %1 = load <8 x i16>, <8 x i16>* %a
84  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
85  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
86  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
87  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
88  ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
89  store <8 x i16> %2, <8 x i16>* %c
90  ; CHECK-DAG: st.h [[R3]], 0($4)
91
92  ret void
93}
94
95define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
96  ; CHECK-LABEL: vshf_v8i16_1:
97
98  %1 = load <8 x i16>, <8 x i16>* %a
99  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
100  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
101  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
102  store <8 x i16> %2, <8 x i16>* %c
103  ; CHECK-DAG: st.h [[R3]], 0($4)
104
105  ret void
106}
107
108define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
109  ; CHECK-LABEL: vshf_v8i16_2:
110
111  %1 = load <8 x i16>, <8 x i16>* %a
112  %2 = load <8 x i16>, <8 x i16>* %b
113  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
114  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
115  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
116  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
117  ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
118  store <8 x i16> %3, <8 x i16>* %c
119  ; CHECK-DAG: st.h [[R3]], 0($4)
120
121  ret void
122}
123
124define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
125  ; CHECK-LABEL: vshf_v8i16_3:
126
127  %1 = load <8 x i16>, <8 x i16>* %a
128  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
129  %2 = load <8 x i16>, <8 x i16>* %b
130  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
131  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
132  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
133  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
134  ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
135  ; the operands to get the right answer.
136  ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R1]]
137  store <8 x i16> %3, <8 x i16>* %c
138  ; CHECK-DAG: st.h [[R3]], 0($4)
139
140  ret void
141}
142
143define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
144  ; CHECK-LABEL: vshf_v8i16_4:
145
146  %1 = load <8 x i16>, <8 x i16>* %a
147  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
148  %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
149  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
150  store <8 x i16> %2, <8 x i16>* %c
151  ; CHECK-DAG: st.h [[R3]], 0($4)
152
153  ret void
154}
155
156; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
157; instruction when using a single vector.
158
159define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
160  ; CHECK-LABEL: vshf_v4i32_0:
161
162  %1 = load <4 x i32>, <4 x i32>* %a
163  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
164  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
165  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
166  store <4 x i32> %2, <4 x i32>* %c
167  ; CHECK-DAG: st.w [[R3]], 0($4)
168
169  ret void
170}
171
172define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
173  ; CHECK-LABEL: vshf_v4i32_1:
174
175  %1 = load <4 x i32>, <4 x i32>* %a
176  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
177  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
178  ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1]
179  store <4 x i32> %2, <4 x i32>* %c
180  ; CHECK-DAG: st.w [[R3]], 0($4)
181
182  ret void
183}
184
185define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
186  ; CHECK-LABEL: vshf_v4i32_2:
187
188  %1 = load <4 x i32>, <4 x i32>* %a
189  %2 = load <4 x i32>, <4 x i32>* %b
190  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
191  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
192  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
193  store <4 x i32> %3, <4 x i32>* %c
194  ; CHECK-DAG: st.w [[R3]], 0($4)
195
196  ret void
197}
198
199define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
200  ; CHECK-LABEL: vshf_v4i32_3:
201
202  %1 = load <4 x i32>, <4 x i32>* %a
203  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
204  %2 = load <4 x i32>, <4 x i32>* %b
205  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
206  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
207  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
208  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]])
209  ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
210  ; the operands to get the right answer.
211  ; CHECK-DAG: vshf.w [[R3]], [[R2]], [[R1]]
212  store <4 x i32> %3, <4 x i32>* %c
213  ; CHECK-DAG: st.w [[R3]], 0($4)
214
215  ret void
216}
217
218define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
219  ; CHECK-LABEL: vshf_v4i32_4:
220
221  %1 = load <4 x i32>, <4 x i32>* %a
222  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
223  %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
224  ; The two operand vectors are the same so element 1 and 5 are equivalent.
225  ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1]
226  store <4 x i32> %2, <4 x i32>* %c
227  ; CHECK-DAG: st.w [[R3]], 0($4)
228
229  ret void
230}
231
232define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
233  ; CHECK-LABEL: vshf_v2i64_0:
234
235  %1 = load <2 x i64>, <2 x i64>* %a
236  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
237  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
238  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
239  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
240  ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
241  store <2 x i64> %2, <2 x i64>* %c
242  ; CHECK-DAG: st.d [[R3]], 0($4)
243
244  ret void
245}
246
247define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
248  ; CHECK-LABEL: vshf_v2i64_1:
249
250  %1 = load <2 x i64>, <2 x i64>* %a
251  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
252  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
253  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
254  store <2 x i64> %2, <2 x i64>* %c
255  ; CHECK-DAG: st.d [[R3]], 0($4)
256
257  ret void
258}
259
260define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
261  ; CHECK-LABEL: vshf_v2i64_2:
262
263  %1 = load <2 x i64>, <2 x i64>* %a
264  %2 = load <2 x i64>, <2 x i64>* %b
265  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
266  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
267  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
268  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
269  ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
270  store <2 x i64> %3, <2 x i64>* %c
271  ; CHECK-DAG: st.d [[R3]], 0($4)
272
273  ret void
274}
275
276define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
277  ; CHECK-LABEL: vshf_v2i64_3:
278
279  %1 = load <2 x i64>, <2 x i64>* %a
280  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
281  %2 = load <2 x i64>, <2 x i64>* %b
282  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
283  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
284  ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
285  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
286  ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
287  ; the operands to get the right answer.
288  ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R1]]
289  store <2 x i64> %3, <2 x i64>* %c
290  ; CHECK-DAG: st.d [[R3]], 0($4)
291
292  ret void
293}
294
295define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
296  ; CHECK-LABEL: vshf_v2i64_4:
297
298  %1 = load <2 x i64>, <2 x i64>* %a
299  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
300  %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
301  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
302  store <2 x i64> %2, <2 x i64>* %c
303  ; CHECK-DAG: st.d [[R3]], 0($4)
304
305  ret void
306}
307
308define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
309  ; CHECK-LABEL: shf_v16i8_0:
310
311  %1 = load <16 x i8>, <16 x i8>* %a
312  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
313  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
314  ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
315  store <16 x i8> %2, <16 x i8>* %c
316  ; CHECK-DAG: st.b [[R3]], 0($4)
317
318  ret void
319}
320
321define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
322  ; CHECK-LABEL: shf_v8i16_0:
323
324  %1 = load <8 x i16>, <8 x i16>* %a
325  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
326  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
327  ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
328  store <8 x i16> %2, <8 x i16>* %c
329  ; CHECK-DAG: st.h [[R3]], 0($4)
330
331  ret void
332}
333
334define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
335  ; CHECK-LABEL: shf_v4i32_0:
336
337  %1 = load <4 x i32>, <4 x i32>* %a
338  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
339  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
340  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
341  store <4 x i32> %2, <4 x i32>* %c
342  ; CHECK-DAG: st.w [[R3]], 0($4)
343
344  ret void
345}
346
347; shf.d does not exist
348
349define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
350  ; CHECK-LABEL: ilvev_v16i8_0:
351
352  %1 = load <16 x i8>, <16 x i8>* %a
353  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
354  %2 = load <16 x i8>, <16 x i8>* %b
355  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
356  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
357                     <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
358  ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
359  store <16 x i8> %3, <16 x i8>* %c
360  ; CHECK-DAG: st.b [[R3]], 0($4)
361
362  ret void
363}
364
365define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
366  ; CHECK-LABEL: ilvev_v8i16_0:
367
368  %1 = load <8 x i16>, <8 x i16>* %a
369  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
370  %2 = load <8 x i16>, <8 x i16>* %b
371  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
372  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
373  ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
374  store <8 x i16> %3, <8 x i16>* %c
375  ; CHECK-DAG: st.h [[R3]], 0($4)
376
377  ret void
378}
379
380define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
381  ; CHECK-LABEL: ilvev_v4i32_0:
382
383  %1 = load <4 x i32>, <4 x i32>* %a
384  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
385  %2 = load <4 x i32>, <4 x i32>* %b
386  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
387  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
388  ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
389  store <4 x i32> %3, <4 x i32>* %c
390  ; CHECK-DAG: st.w [[R3]], 0($4)
391
392  ret void
393}
394
395define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
396  ; CHECK-LABEL: ilvev_v2i64_0:
397
398  %1 = load <2 x i64>, <2 x i64>* %a
399  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
400  %2 = load <2 x i64>, <2 x i64>* %b
401  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
402  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
403  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
404  store <2 x i64> %3, <2 x i64>* %c
405  ; CHECK-DAG: st.d [[R3]], 0($4)
406
407  ret void
408}
409
410; Interleaving one operand with itself.
411define void @ilvev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
412  ; CHECK-LABEL: ilvev_v16i8_1:
413
414  %1 = load <16 x i8>, <16 x i8>* %a
415  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
416  %2 = load <16 x i8>, <16 x i8>* %b
417  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
418                     <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
419  ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
420  store <16 x i8> %3, <16 x i8>* %c
421  ; CHECK-DAG: st.b [[R3]], 0($4)
422
423  ret void
424}
425
426define void @ilvev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
427  ; CHECK-LABEL: ilvev_v8i16_1:
428
429  %1 = load <8 x i16>, <8 x i16>* %a
430  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
431  %2 = load <8 x i16>, <8 x i16>* %b
432  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
433  ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
434  store <8 x i16> %3, <8 x i16>* %c
435  ; CHECK-DAG: st.h [[R3]], 0($4)
436
437  ret void
438}
439
440define void @ilvev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
441  ; CHECK-LABEL: ilvev_v4i32_1:
442
443  %1 = load <4 x i32>, <4 x i32>* %a
444  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
445  %2 = load <4 x i32>, <4 x i32>* %b
446  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
447  ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
448  store <4 x i32> %3, <4 x i32>* %c
449  ; CHECK-DAG: st.w [[R3]], 0($4)
450
451  ret void
452}
453
454define void @ilvev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
455  ; CHECK-LABEL: ilvev_v2i64_1:
456
457  %1 = load <2 x i64>, <2 x i64>* %a
458  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
459  %2 = load <2 x i64>, <2 x i64>* %b
460  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
461  ; ilvev.d with two identical operands is equivalent to splati.d
462  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
463  store <2 x i64> %3, <2 x i64>* %c
464  ; CHECK-DAG: st.d [[R3]], 0($4)
465
466  ret void
467}
468
469define void @ilvev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
470  ; CHECK-LABEL: ilvev_v16i8_2:
471
472  %1 = load <16 x i8>, <16 x i8>* %a
473  %2 = load <16 x i8>, <16 x i8>* %b
474  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
475  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
476                     <16 x i32> <i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
477  ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
478  store <16 x i8> %3, <16 x i8>* %c
479  ; CHECK-DAG: st.b [[R3]], 0($4)
480
481  ret void
482}
483
484define void @ilvev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
485  ; CHECK-LABEL: ilvev_v8i16_2:
486
487  %1 = load <8 x i16>, <8 x i16>* %a
488  %2 = load <8 x i16>, <8 x i16>* %b
489  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
490  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
491  ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
492  store <8 x i16> %3, <8 x i16>* %c
493  ; CHECK-DAG: st.h [[R3]], 0($4)
494
495  ret void
496}
497
498define void @ilvev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
499  ; CHECK-LABEL: ilvev_v4i32_2:
500
501  %1 = load <4 x i32>, <4 x i32>* %a
502  %2 = load <4 x i32>, <4 x i32>* %b
503  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
504  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 6, i32 6>
505  ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
506  store <4 x i32> %3, <4 x i32>* %c
507  ; CHECK-DAG: st.w [[R3]], 0($4)
508
509  ret void
510}
511
512define void @ilvev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
513  ; CHECK-LABEL: ilvev_v2i64_2:
514
515  %1 = load <2 x i64>, <2 x i64>* %a
516  %2 = load <2 x i64>, <2 x i64>* %b
517  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
518  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
519  ; ilvev.d with two identical operands is equivalent to splati.d
520  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
521  store <2 x i64> %3, <2 x i64>* %c
522  ; CHECK-DAG: st.d [[R3]], 0($4)
523
524  ret void
525}
526
527define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
528  ; CHECK-LABEL: ilvod_v16i8_0:
529
530  %1 = load <16 x i8>, <16 x i8>* %a
531  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
532  %2 = load <16 x i8>, <16 x i8>* %b
533  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
534  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
535                     <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
536  ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
537  store <16 x i8> %3, <16 x i8>* %c
538  ; CHECK-DAG: st.b [[R3]], 0($4)
539
540  ret void
541}
542
543define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
544  ; CHECK-LABEL: ilvod_v8i16_0:
545
546  %1 = load <8 x i16>, <8 x i16>* %a
547  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
548  %2 = load <8 x i16>, <8 x i16>* %b
549  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
550  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
551  ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
552  store <8 x i16> %3, <8 x i16>* %c
553  ; CHECK-DAG: st.h [[R3]], 0($4)
554
555  ret void
556}
557
558define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
559  ; CHECK-LABEL: ilvod_v4i32_0:
560
561  %1 = load <4 x i32>, <4 x i32>* %a
562  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
563  %2 = load <4 x i32>, <4 x i32>* %b
564  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
565  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
566  ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
567  store <4 x i32> %3, <4 x i32>* %c
568  ; CHECK-DAG: st.w [[R3]], 0($4)
569
570  ret void
571}
572
573define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
574  ; CHECK-LABEL: ilvod_v2i64_0:
575
576  %1 = load <2 x i64>, <2 x i64>* %a
577  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
578  %2 = load <2 x i64>, <2 x i64>* %b
579  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
580  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
581  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
582  store <2 x i64> %3, <2 x i64>* %c
583  ; CHECK-DAG: st.d [[R3]], 0($4)
584
585  ret void
586}
587
588define void @ilvod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
589  ; CHECK-LABEL: ilvod_v16i8_1:
590
591  %1 = load <16 x i8>, <16 x i8>* %a
592  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
593  %2 = load <16 x i8>, <16 x i8>* %b
594  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
595                     <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
596  ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
597  store <16 x i8> %3, <16 x i8>* %c
598  ; CHECK-DAG: st.b [[R3]], 0($4)
599
600  ret void
601}
602
603define void @ilvod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
604  ; CHECK-LABEL: ilvod_v8i16_1:
605
606  %1 = load <8 x i16>, <8 x i16>* %a
607  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
608  %2 = load <8 x i16>, <8 x i16>* %b
609  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
610  ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
611  store <8 x i16> %3, <8 x i16>* %c
612  ; CHECK-DAG: st.h [[R3]], 0($4)
613
614  ret void
615}
616
617define void @ilvod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
618  ; CHECK-LABEL: ilvod_v4i32_1:
619
620  %1 = load <4 x i32>, <4 x i32>* %a
621  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
622  %2 = load <4 x i32>, <4 x i32>* %b
623  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
624  ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
625  store <4 x i32> %3, <4 x i32>* %c
626  ; CHECK-DAG: st.w [[R3]], 0($4)
627
628  ret void
629}
630
631define void @ilvod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
632  ; CHECK-LABEL: ilvod_v2i64_1:
633
634  %1 = load <2 x i64>, <2 x i64>* %a
635  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
636  %2 = load <2 x i64>, <2 x i64>* %b
637  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
638  ; ilvod.d with two identical operands is equivalent to splati.d
639  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
640  store <2 x i64> %3, <2 x i64>* %c
641  ; CHECK-DAG: st.d [[R3]], 0($4)
642
643  ret void
644}
645
646define void @ilvod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
647  ; CHECK-LABEL: ilvod_v16i8_2:
648
649  %1 = load <16 x i8>, <16 x i8>* %a
650  %2 = load <16 x i8>, <16 x i8>* %b
651  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
652  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
653                     <16 x i32> <i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
654  ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
655  store <16 x i8> %3, <16 x i8>* %c
656  ; CHECK-DAG: st.b [[R3]], 0($4)
657
658  ret void
659}
660
661define void @ilvod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
662  ; CHECK-LABEL: ilvod_v8i16_2:
663
664  %1 = load <8 x i16>, <8 x i16>* %a
665  %2 = load <8 x i16>, <8 x i16>* %b
666  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
667  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
668  ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
669  store <8 x i16> %3, <8 x i16>* %c
670  ; CHECK-DAG: st.h [[R3]], 0($4)
671
672  ret void
673}
674
675define void @ilvod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
676  ; CHECK-LABEL: ilvod_v4i32_2:
677
678  %1 = load <4 x i32>, <4 x i32>* %a
679  %2 = load <4 x i32>, <4 x i32>* %b
680  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
681  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 5, i32 7, i32 7>
682  ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
683  store <4 x i32> %3, <4 x i32>* %c
684  ; CHECK-DAG: st.w [[R3]], 0($4)
685
686  ret void
687}
688
689define void @ilvod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
690  ; CHECK-LABEL: ilvod_v2i64_2:
691
692  %1 = load <2 x i64>, <2 x i64>* %a
693  %2 = load <2 x i64>, <2 x i64>* %b
694  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
695  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
696  ; ilvod.d with two identical operands is equivalent to splati.d
697  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
698  store <2 x i64> %3, <2 x i64>* %c
699  ; CHECK-DAG: st.d [[R3]], 0($4)
700
701  ret void
702}
703
704define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
705  ; CHECK-LABEL: ilvr_v16i8_0:
706
707  %1 = load <16 x i8>, <16 x i8>* %a
708  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
709  %2 = load <16 x i8>, <16 x i8>* %b
710  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
711  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
712                     <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
713  ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
714  store <16 x i8> %3, <16 x i8>* %c
715  ; CHECK-DAG: st.b [[R3]], 0($4)
716
717  ret void
718}
719
720define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
721  ; CHECK-LABEL: ilvr_v8i16_0:
722
723  %1 = load <8 x i16>, <8 x i16>* %a
724  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
725  %2 = load <8 x i16>, <8 x i16>* %b
726  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
727  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
728  ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
729  store <8 x i16> %3, <8 x i16>* %c
730  ; CHECK-DAG: st.h [[R3]], 0($4)
731
732  ret void
733}
734
735define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
736  ; CHECK-LABEL: ilvr_v4i32_0:
737
738  %1 = load <4 x i32>, <4 x i32>* %a
739  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
740  %2 = load <4 x i32>, <4 x i32>* %b
741  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
742  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
743  ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
744  store <4 x i32> %3, <4 x i32>* %c
745  ; CHECK-DAG: st.w [[R3]], 0($4)
746
747  ret void
748}
749
750define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
751  ; CHECK-LABEL: ilvr_v2i64_0:
752
753  %1 = load <2 x i64>, <2 x i64>* %a
754  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
755  %2 = load <2 x i64>, <2 x i64>* %b
756  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
757  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
758  ; ilvr.d and ilvev.d are equivalent for v2i64
759  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
760  store <2 x i64> %3, <2 x i64>* %c
761  ; CHECK-DAG: st.d [[R3]], 0($4)
762
763  ret void
764}
765
766define void @ilvr_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
767  ; CHECK-LABEL: ilvr_v16i8_1:
768
769  %1 = load <16 x i8>, <16 x i8>* %a
770  %2 = load <16 x i8>, <16 x i8>* %b
771  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
772  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
773                     <16 x i32> <i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23>
774  ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
775  store <16 x i8> %3, <16 x i8>* %c
776  ; CHECK-DAG: st.b [[R3]], 0($4)
777
778  ret void
779}
780
781define void @ilvr_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
782  ; CHECK-LABEL: ilvr_v8i16_1:
783
784  %1 = load <8 x i16>, <8 x i16>* %a
785  %2 = load <8 x i16>, <8 x i16>* %b
786  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
787  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11>
788  ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
789  store <8 x i16> %3, <8 x i16>* %c
790  ; CHECK-DAG: st.h [[R3]], 0($4)
791
792  ret void
793}
794
795define void @ilvr_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
796  ; CHECK-LABEL: ilvr_v4i32_1:
797
798  %1 = load <4 x i32>, <4 x i32>* %a
799  %2 = load <4 x i32>, <4 x i32>* %b
800  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
801  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 5, i32 5>
802  ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
803  store <4 x i32> %3, <4 x i32>* %c
804  ; CHECK-DAG: st.w [[R3]], 0($4)
805
806  ret void
807}
808
809define void @ilvr_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
810  ; CHECK-LABEL: ilvr_v2i64_1:
811
812  %1 = load <2 x i64>, <2 x i64>* %a
813  %2 = load <2 x i64>, <2 x i64>* %b
814  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
815  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
816  ; ilvr.d and splati.d are equivalent for v2i64
817  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
818  store <2 x i64> %3, <2 x i64>* %c
819  ; CHECK-DAG: st.d [[R3]], 0($4)
820
821  ret void
822}
823
824define void @ilvr_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
825  ; CHECK-LABEL: ilvr_v16i8_2:
826
827  %1 = load <16 x i8>, <16 x i8>* %a
828  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
829  %2 = load <16 x i8>, <16 x i8>* %b
830  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
831                     <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
832  ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
833  store <16 x i8> %3, <16 x i8>* %c
834  ; CHECK-DAG: st.b [[R3]], 0($4)
835
836  ret void
837}
838
839define void @ilvr_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
840  ; CHECK-LABEL: ilvr_v8i16_2:
841
842  %1 = load <8 x i16>, <8 x i16>* %a
843  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
844  %2 = load <8 x i16>, <8 x i16>* %b
845  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
846  ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
847  store <8 x i16> %3, <8 x i16>* %c
848  ; CHECK-DAG: st.h [[R3]], 0($4)
849
850  ret void
851}
852
853define void @ilvr_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
854  ; CHECK-LABEL: ilvr_v4i32_2:
855
856  %1 = load <4 x i32>, <4 x i32>* %a
857  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
858  %2 = load <4 x i32>, <4 x i32>* %b
859  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
860  ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
861  store <4 x i32> %3, <4 x i32>* %c
862  ; CHECK-DAG: st.w [[R3]], 0($4)
863
864  ret void
865}
866
867define void @ilvr_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
868  ; CHECK-LABEL: ilvr_v2i64_2:
869
870  %1 = load <2 x i64>, <2 x i64>* %a
871  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
872  %2 = load <2 x i64>, <2 x i64>* %b
873  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
874  ; ilvr.d and splati.d are equivalent for v2i64
875  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
876  store <2 x i64> %3, <2 x i64>* %c
877  ; CHECK-DAG: st.d [[R3]], 0($4)
878
879  ret void
880}
881
882define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
883  ; CHECK-LABEL: ilvl_v16i8_0:
884
885  %1 = load <16 x i8>, <16 x i8>* %a
886  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
887  %2 = load <16 x i8>, <16 x i8>* %b
888  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
889  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
890                     <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
891  ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
892  store <16 x i8> %3, <16 x i8>* %c
893  ; CHECK-DAG: st.b [[R3]], 0($4)
894
895  ret void
896}
897
898define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
899  ; CHECK-LABEL: ilvl_v8i16_0:
900
901  %1 = load <8 x i16>, <8 x i16>* %a
902  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
903  %2 = load <8 x i16>, <8 x i16>* %b
904  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
905  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
906  ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
907  store <8 x i16> %3, <8 x i16>* %c
908  ; CHECK-DAG: st.h [[R3]], 0($4)
909
910  ret void
911}
912
913define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
914  ; CHECK-LABEL: ilvl_v4i32_0:
915
916  %1 = load <4 x i32>, <4 x i32>* %a
917  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
918  %2 = load <4 x i32>, <4 x i32>* %b
919  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
920  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
921  ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
922  store <4 x i32> %3, <4 x i32>* %c
923  ; CHECK-DAG: st.w [[R3]], 0($4)
924
925  ret void
926}
927
928define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
929  ; CHECK-LABEL: ilvl_v2i64_0:
930
931  %1 = load <2 x i64>, <2 x i64>* %a
932  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
933  %2 = load <2 x i64>, <2 x i64>* %b
934  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
935  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
936  ; ilvl.d and ilvod.d are equivalent for v2i64
937  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
938  store <2 x i64> %3, <2 x i64>* %c
939  ; CHECK-DAG: st.d [[R3]], 0($4)
940
941  ret void
942}
943
944define void @ilvl_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
945  ; CHECK-LABEL: ilvl_v16i8_1:
946
947  %1 = load <16 x i8>, <16 x i8>* %a
948  %2 = load <16 x i8>, <16 x i8>* %b
949  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
950  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
951                     <16 x i32> <i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
952  ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
953  store <16 x i8> %3, <16 x i8>* %c
954  ; CHECK-DAG: st.b [[R3]], 0($4)
955
956  ret void
957}
958
959define void @ilvl_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
960  ; CHECK-LABEL: ilvl_v8i16_1:
961
962  %1 = load <8 x i16>, <8 x i16>* %a
963  %2 = load <8 x i16>, <8 x i16>* %b
964  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
965  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
966  ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
967  store <8 x i16> %3, <8 x i16>* %c
968  ; CHECK-DAG: st.h [[R3]], 0($4)
969
970  ret void
971}
972
973define void @ilvl_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
974  ; CHECK-LABEL: ilvl_v4i32_1:
975
976  %1 = load <4 x i32>, <4 x i32>* %a
977  %2 = load <4 x i32>, <4 x i32>* %b
978  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
979  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 6, i32 6, i32 7, i32 7>
980  ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
981  store <4 x i32> %3, <4 x i32>* %c
982  ; CHECK-DAG: st.w [[R3]], 0($4)
983
984  ret void
985}
986
987define void @ilvl_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
988  ; CHECK-LABEL: ilvl_v2i64_1:
989
990  %1 = load <2 x i64>, <2 x i64>* %a
991  %2 = load <2 x i64>, <2 x i64>* %b
992  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
993  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
994  ; ilvl.d and splati.d are equivalent for v2i64
995  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
996  store <2 x i64> %3, <2 x i64>* %c
997  ; CHECK-DAG: st.d [[R3]], 0($4)
998
999  ret void
1000}
1001
1002define void @ilvl_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1003  ; CHECK-LABEL: ilvl_v16i8_2:
1004
1005  %1 = load <16 x i8>, <16 x i8>* %a
1006  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1007  %2 = load <16 x i8>, <16 x i8>* %b
1008  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1009                     <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
1010  ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1011  store <16 x i8> %3, <16 x i8>* %c
1012  ; CHECK-DAG: st.b [[R3]], 0($4)
1013
1014  ret void
1015}
1016
1017define void @ilvl_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1018  ; CHECK-LABEL: ilvl_v8i16_2:
1019
1020  %1 = load <8 x i16>, <8 x i16>* %a
1021  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1022  %2 = load <8 x i16>, <8 x i16>* %b
1023  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
1024  ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1025  store <8 x i16> %3, <8 x i16>* %c
1026  ; CHECK-DAG: st.h [[R3]], 0($4)
1027
1028  ret void
1029}
1030
1031define void @ilvl_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1032  ; CHECK-LABEL: ilvl_v4i32_2:
1033
1034  %1 = load <4 x i32>, <4 x i32>* %a
1035  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1036  %2 = load <4 x i32>, <4 x i32>* %b
1037  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
1038  ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1039  store <4 x i32> %3, <4 x i32>* %c
1040  ; CHECK-DAG: st.w [[R3]], 0($4)
1041
1042  ret void
1043}
1044
1045define void @ilvl_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1046  ; CHECK-LABEL: ilvl_v2i64_2:
1047
1048  %1 = load <2 x i64>, <2 x i64>* %a
1049  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1050  %2 = load <2 x i64>, <2 x i64>* %b
1051  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
1052  ; ilvl.d and splati.d are equivalent for v2i64
1053  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
1054  store <2 x i64> %3, <2 x i64>* %c
1055  ; CHECK-DAG: st.d [[R3]], 0($4)
1056
1057  ret void
1058}
1059
1060define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1061  ; CHECK-LABEL: pckev_v16i8_0:
1062
1063  %1 = load <16 x i8>, <16 x i8>* %a
1064  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1065  %2 = load <16 x i8>, <16 x i8>* %b
1066  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1067  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1068                     <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1069  ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1070  store <16 x i8> %3, <16 x i8>* %c
1071  ; CHECK-DAG: st.b [[R3]], 0($4)
1072
1073  ret void
1074}
1075
1076define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1077  ; CHECK-LABEL: pckev_v8i16_0:
1078
1079  %1 = load <8 x i16>, <8 x i16>* %a
1080  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1081  %2 = load <8 x i16>, <8 x i16>* %b
1082  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1083  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1084  ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1085  store <8 x i16> %3, <8 x i16>* %c
1086  ; CHECK-DAG: st.h [[R3]], 0($4)
1087
1088  ret void
1089}
1090
1091define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1092  ; CHECK-LABEL: pckev_v4i32_0:
1093
1094  %1 = load <4 x i32>, <4 x i32>* %a
1095  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1096  %2 = load <4 x i32>, <4 x i32>* %b
1097  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1098  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1099  ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1100  store <4 x i32> %3, <4 x i32>* %c
1101  ; CHECK-DAG: st.w [[R3]], 0($4)
1102
1103  ret void
1104}
1105
1106define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1107  ; CHECK-LABEL: pckev_v2i64_0:
1108
1109  %1 = load <2 x i64>, <2 x i64>* %a
1110  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1111  %2 = load <2 x i64>, <2 x i64>* %b
1112  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1113  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
1114  ; pckev.d and ilvev.d are equivalent for v2i64
1115  ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1116  store <2 x i64> %3, <2 x i64>* %c
1117  ; CHECK-DAG: st.d [[R3]], 0($4)
1118
1119  ret void
1120}
1121
1122define void @pckev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1123  ; CHECK-LABEL: pckev_v16i8_1:
1124
1125  %1 = load <16 x i8>, <16 x i8>* %a
1126  %2 = load <16 x i8>, <16 x i8>* %b
1127  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1128  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1129                     <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1130  ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1131  store <16 x i8> %3, <16 x i8>* %c
1132  ; CHECK-DAG: st.b [[R3]], 0($4)
1133
1134  ret void
1135}
1136
1137define void @pckev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1138  ; CHECK-LABEL: pckev_v8i16_1:
1139
1140  %1 = load <8 x i16>, <8 x i16>* %a
1141  %2 = load <8 x i16>, <8 x i16>* %b
1142  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1143  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 8, i32 10, i32 12, i32 14>
1144  ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1145  store <8 x i16> %3, <8 x i16>* %c
1146  ; CHECK-DAG: st.h [[R3]], 0($4)
1147
1148  ret void
1149}
1150
1151define void @pckev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1152  ; CHECK-LABEL: pckev_v4i32_1:
1153
1154  %1 = load <4 x i32>, <4 x i32>* %a
1155  %2 = load <4 x i32>, <4 x i32>* %b
1156  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1157  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 6, i32 4, i32 6>
1158  ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1159  store <4 x i32> %3, <4 x i32>* %c
1160  ; CHECK-DAG: st.w [[R3]], 0($4)
1161
1162  ret void
1163}
1164
1165define void @pckev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1166  ; CHECK-LABEL: pckev_v2i64_1:
1167
1168  %1 = load <2 x i64>, <2 x i64>* %a
1169  %2 = load <2 x i64>, <2 x i64>* %b
1170  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1171  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
1172  ; pckev.d and splati.d are equivalent for v2i64
1173  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
1174  store <2 x i64> %3, <2 x i64>* %c
1175  ; CHECK-DAG: st.d [[R3]], 0($4)
1176
1177  ret void
1178}
1179
1180define void @pckev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1181  ; CHECK-LABEL: pckev_v16i8_2:
1182
1183  %1 = load <16 x i8>, <16 x i8>* %a
1184  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1185  %2 = load <16 x i8>, <16 x i8>* %b
1186  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1187                     <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1188  ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1189  store <16 x i8> %3, <16 x i8>* %c
1190  ; CHECK-DAG: st.b [[R3]], 0($4)
1191
1192  ret void
1193}
1194
1195define void @pckev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1196  ; CHECK-LABEL: pckev_v8i16_2:
1197
1198  %1 = load <8 x i16>, <8 x i16>* %a
1199  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1200  %2 = load <8 x i16>, <8 x i16>* %b
1201  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6>
1202  ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1203  store <8 x i16> %3, <8 x i16>* %c
1204  ; CHECK-DAG: st.h [[R3]], 0($4)
1205
1206  ret void
1207}
1208
1209define void @pckev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1210  ; CHECK-LABEL: pckev_v4i32_2:
1211
1212  %1 = load <4 x i32>, <4 x i32>* %a
1213  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1214  %2 = load <4 x i32>, <4 x i32>* %b
1215  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
1216  ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1217  store <4 x i32> %3, <4 x i32>* %c
1218  ; CHECK-DAG: st.w [[R3]], 0($4)
1219
1220  ret void
1221}
1222
1223define void @pckev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1224  ; CHECK-LABEL: pckev_v2i64_2:
1225
1226  %1 = load <2 x i64>, <2 x i64>* %a
1227  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1228  %2 = load <2 x i64>, <2 x i64>* %b
1229  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
1230  ; pckev.d and splati.d are equivalent for v2i64
1231  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
1232  store <2 x i64> %3, <2 x i64>* %c
1233  ; CHECK-DAG: st.d [[R3]], 0($4)
1234
1235  ret void
1236}
1237
1238define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1239  ; CHECK-LABEL: pckod_v16i8_0:
1240
1241  %1 = load <16 x i8>, <16 x i8>* %a
1242  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1243  %2 = load <16 x i8>, <16 x i8>* %b
1244  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1245  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1246                     <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1247  ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1248  store <16 x i8> %3, <16 x i8>* %c
1249  ; CHECK-DAG: st.b [[R3]], 0($4)
1250
1251  ret void
1252}
1253
1254define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1255  ; CHECK-LABEL: pckod_v8i16_0:
1256
1257  %1 = load <8 x i16>, <8 x i16>* %a
1258  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1259  %2 = load <8 x i16>, <8 x i16>* %b
1260  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1261  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1262  ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1263  store <8 x i16> %3, <8 x i16>* %c
1264  ; CHECK-DAG: st.h [[R3]], 0($4)
1265
1266  ret void
1267}
1268
1269define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1270  ; CHECK-LABEL: pckod_v4i32_0:
1271
1272  %1 = load <4 x i32>, <4 x i32>* %a
1273  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1274  %2 = load <4 x i32>, <4 x i32>* %b
1275  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1276  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1277  ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1278  store <4 x i32> %3, <4 x i32>* %c
1279  ; CHECK-DAG: st.w [[R3]], 0($4)
1280
1281  ret void
1282}
1283
1284define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1285  ; CHECK-LABEL: pckod_v2i64_0:
1286
1287  %1 = load <2 x i64>, <2 x i64>* %a
1288  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1289  %2 = load <2 x i64>, <2 x i64>* %b
1290  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1291  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
1292  ; pckod.d and ilvod.d are equivalent for v2i64
1293  ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1294  store <2 x i64> %3, <2 x i64>* %c
1295  ; CHECK-DAG: st.d [[R3]], 0($4)
1296
1297  ret void
1298}
1299
1300define void @pckod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1301  ; CHECK-LABEL: pckod_v16i8_1:
1302
1303  %1 = load <16 x i8>, <16 x i8>* %a
1304  %2 = load <16 x i8>, <16 x i8>* %b
1305  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1306  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1307                     <16 x i32> <i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1308  ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1309  store <16 x i8> %3, <16 x i8>* %c
1310  ; CHECK-DAG: st.b [[R3]], 0($4)
1311
1312  ret void
1313}
1314
1315define void @pckod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1316  ; CHECK-LABEL: pckod_v8i16_1:
1317
1318  %1 = load <8 x i16>, <8 x i16>* %a
1319  %2 = load <8 x i16>, <8 x i16>* %b
1320  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1321  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 9, i32 11, i32 13, i32 15>
1322  ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1323  store <8 x i16> %3, <8 x i16>* %c
1324  ; CHECK-DAG: st.h [[R3]], 0($4)
1325
1326  ret void
1327}
1328
1329define void @pckod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1330  ; CHECK-LABEL: pckod_v4i32_1:
1331
1332  %1 = load <4 x i32>, <4 x i32>* %a
1333  %2 = load <4 x i32>, <4 x i32>* %b
1334  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1335  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 7, i32 5, i32 7>
1336  ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1337  store <4 x i32> %3, <4 x i32>* %c
1338  ; CHECK-DAG: st.w [[R3]], 0($4)
1339
1340  ret void
1341}
1342
1343define void @pckod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1344  ; CHECK-LABEL: pckod_v2i64_1:
1345
1346  %1 = load <2 x i64>, <2 x i64>* %a
1347  %2 = load <2 x i64>, <2 x i64>* %b
1348  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1349  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
1350  ; pckod.d and splati.d are equivalent for v2i64
1351  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
1352  store <2 x i64> %3, <2 x i64>* %c
1353  ; CHECK-DAG: st.d [[R3]], 0($4)
1354
1355  ret void
1356}
1357
1358define void @pckod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1359  ; CHECK-LABEL: pckod_v16i8_2:
1360
1361  %1 = load <16 x i8>, <16 x i8>* %a
1362  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1363  %2 = load <16 x i8>, <16 x i8>* %b
1364  %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1365                     <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1366  ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1367  store <16 x i8> %3, <16 x i8>* %c
1368  ; CHECK-DAG: st.b [[R3]], 0($4)
1369
1370  ret void
1371}
1372
1373define void @pckod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1374  ; CHECK-LABEL: pckod_v8i16_2:
1375
1376  %1 = load <8 x i16>, <8 x i16>* %a
1377  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1378  %2 = load <8 x i16>, <8 x i16>* %b
1379  %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 1, i32 3, i32 5, i32 7>
1380  ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1381  store <8 x i16> %3, <8 x i16>* %c
1382  ; CHECK-DAG: st.h [[R3]], 0($4)
1383
1384  ret void
1385}
1386
1387define void @pckod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1388  ; CHECK-LABEL: pckod_v4i32_2:
1389
1390  %1 = load <4 x i32>, <4 x i32>* %a
1391  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1392  %2 = load <4 x i32>, <4 x i32>* %b
1393  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
1394  ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1395  store <4 x i32> %3, <4 x i32>* %c
1396  ; CHECK-DAG: st.w [[R3]], 0($4)
1397
1398  ret void
1399}
1400
1401define void @pckod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1402  ; CHECK-LABEL: pckod_v2i64_2:
1403
1404  %1 = load <2 x i64>, <2 x i64>* %a
1405  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1406  %2 = load <2 x i64>, <2 x i64>* %b
1407  %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
1408  ; pckod.d and splati.d are equivalent for v2i64
1409  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
1410  store <2 x i64> %3, <2 x i64>* %c
1411  ; CHECK-DAG: st.d [[R3]], 0($4)
1412
1413  ret void
1414}
1415
1416define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1417  ; CHECK-LABEL: splati_v16i8_0:
1418
1419  %1 = load <16 x i8>, <16 x i8>* %a
1420  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1421  %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
1422                     <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1423  ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
1424  store <16 x i8> %2, <16 x i8>* %c
1425  ; CHECK-DAG: st.b [[R3]], 0($4)
1426
1427  ret void
1428}
1429
1430define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1431  ; CHECK-LABEL: splati_v8i16_0:
1432
1433  %1 = load <8 x i16>, <8 x i16>* %a
1434  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1435  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1436  ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
1437  store <8 x i16> %2, <8 x i16>* %c
1438  ; CHECK-DAG: st.h [[R3]], 0($4)
1439
1440  ret void
1441}
1442
1443define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1444  ; CHECK-LABEL: splati_v4i32_0:
1445
1446  %1 = load <4 x i32>, <4 x i32>* %a
1447  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1448  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1449  ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][3]
1450  store <4 x i32> %2, <4 x i32>* %c
1451  ; CHECK-DAG: st.w [[R3]], 0($4)
1452
1453  ret void
1454}
1455
1456define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1457  ; CHECK-LABEL: splati_v2i64_0:
1458
1459  %1 = load <2 x i64>, <2 x i64>* %a
1460  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1461  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
1462  ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
1463  store <2 x i64> %2, <2 x i64>* %c
1464  ; CHECK-DAG: st.d [[R3]], 0($4)
1465
1466  ret void
1467}
1468