1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
3
4define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
5  ; CHECK: and_v16i8:
6
7  %1 = load <16 x i8>* %a
8  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9  %2 = load <16 x i8>* %b
10  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
11  %3 = and <16 x i8> %1, %2
12  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13  store <16 x i8> %3, <16 x i8>* %c
14  ; CHECK-DAG: st.b [[R3]], 0($4)
15
16  ret void
17  ; CHECK: .size and_v16i8
18}
19
20define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
21  ; CHECK: and_v8i16:
22
23  %1 = load <8 x i16>* %a
24  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
25  %2 = load <8 x i16>* %b
26  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
27  %3 = and <8 x i16> %1, %2
28  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29  store <8 x i16> %3, <8 x i16>* %c
30  ; CHECK-DAG: st.h [[R3]], 0($4)
31
32  ret void
33  ; CHECK: .size and_v8i16
34}
35
36define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
37  ; CHECK: and_v4i32:
38
39  %1 = load <4 x i32>* %a
40  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41  %2 = load <4 x i32>* %b
42  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43  %3 = and <4 x i32> %1, %2
44  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45  store <4 x i32> %3, <4 x i32>* %c
46  ; CHECK-DAG: st.w [[R3]], 0($4)
47
48  ret void
49  ; CHECK: .size and_v4i32
50}
51
52define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
53  ; CHECK: and_v2i64:
54
55  %1 = load <2 x i64>* %a
56  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57  %2 = load <2 x i64>* %b
58  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59  %3 = and <2 x i64> %1, %2
60  ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61  store <2 x i64> %3, <2 x i64>* %c
62  ; CHECK-DAG: st.d [[R3]], 0($4)
63
64  ret void
65  ; CHECK: .size and_v2i64
66}
67
68define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
69  ; CHECK: and_v16i8_i:
70
71  %1 = load <16 x i8>* %a
72  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
73  %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
74  ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
75  store <16 x i8> %2, <16 x i8>* %c
76  ; CHECK-DAG: st.b [[R4]], 0($4)
77
78  ret void
79  ; CHECK: .size and_v16i8_i
80}
81
82define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
83  ; CHECK: and_v8i16_i:
84
85  %1 = load <8 x i16>* %a
86  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
87  %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
88  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
89  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
90  store <8 x i16> %2, <8 x i16>* %c
91  ; CHECK-DAG: st.h [[R4]], 0($4)
92
93  ret void
94  ; CHECK: .size and_v8i16_i
95}
96
97define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
98  ; CHECK: and_v4i32_i:
99
100  %1 = load <4 x i32>* %a
101  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
102  %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
103  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
104  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
105  store <4 x i32> %2, <4 x i32>* %c
106  ; CHECK-DAG: st.w [[R4]], 0($4)
107
108  ret void
109  ; CHECK: .size and_v4i32_i
110}
111
112define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
113  ; CHECK: and_v2i64_i:
114
115  %1 = load <2 x i64>* %a
116  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
117  %2 = and <2 x i64> %1, <i64 1, i64 1>
118  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
119  ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
120  store <2 x i64> %2, <2 x i64>* %c
121  ; CHECK-DAG: st.d [[R4]], 0($4)
122
123  ret void
124  ; CHECK: .size and_v2i64_i
125}
126
127define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
128  ; CHECK: or_v16i8:
129
130  %1 = load <16 x i8>* %a
131  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
132  %2 = load <16 x i8>* %b
133  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
134  %3 = or <16 x i8> %1, %2
135  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
136  store <16 x i8> %3, <16 x i8>* %c
137  ; CHECK-DAG: st.b [[R3]], 0($4)
138
139  ret void
140  ; CHECK: .size or_v16i8
141}
142
143define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
144  ; CHECK: or_v8i16:
145
146  %1 = load <8 x i16>* %a
147  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
148  %2 = load <8 x i16>* %b
149  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
150  %3 = or <8 x i16> %1, %2
151  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
152  store <8 x i16> %3, <8 x i16>* %c
153  ; CHECK-DAG: st.h [[R3]], 0($4)
154
155  ret void
156  ; CHECK: .size or_v8i16
157}
158
159define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
160  ; CHECK: or_v4i32:
161
162  %1 = load <4 x i32>* %a
163  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
164  %2 = load <4 x i32>* %b
165  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
166  %3 = or <4 x i32> %1, %2
167  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
168  store <4 x i32> %3, <4 x i32>* %c
169  ; CHECK-DAG: st.w [[R3]], 0($4)
170
171  ret void
172  ; CHECK: .size or_v4i32
173}
174
175define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
176  ; CHECK: or_v2i64:
177
178  %1 = load <2 x i64>* %a
179  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
180  %2 = load <2 x i64>* %b
181  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
182  %3 = or <2 x i64> %1, %2
183  ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
184  store <2 x i64> %3, <2 x i64>* %c
185  ; CHECK-DAG: st.d [[R3]], 0($4)
186
187  ret void
188  ; CHECK: .size or_v2i64
189}
190
191define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
192  ; CHECK: or_v16i8_i:
193
194  %1 = load <16 x i8>* %a
195  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
196  %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
197  ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3
198  store <16 x i8> %2, <16 x i8>* %c
199  ; CHECK-DAG: st.b [[R4]], 0($4)
200
201  ret void
202  ; CHECK: .size or_v16i8_i
203}
204
205define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
206  ; CHECK: or_v8i16_i:
207
208  %1 = load <8 x i16>* %a
209  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
210  %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
211  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
212  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
213  store <8 x i16> %2, <8 x i16>* %c
214  ; CHECK-DAG: st.h [[R4]], 0($4)
215
216  ret void
217  ; CHECK: .size or_v8i16_i
218}
219
220define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
221  ; CHECK: or_v4i32_i:
222
223  %1 = load <4 x i32>* %a
224  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
225  %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
226  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
227  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
228  store <4 x i32> %2, <4 x i32>* %c
229  ; CHECK-DAG: st.w [[R4]], 0($4)
230
231  ret void
232  ; CHECK: .size or_v4i32_i
233}
234
235define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
236  ; CHECK: or_v2i64_i:
237
238  %1 = load <2 x i64>* %a
239  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
240  %2 = or <2 x i64> %1, <i64 3, i64 3>
241  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
242  ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
243  store <2 x i64> %2, <2 x i64>* %c
244  ; CHECK-DAG: st.d [[R4]], 0($4)
245
246  ret void
247  ; CHECK: .size or_v2i64_i
248}
249
250define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
251  ; CHECK: nor_v16i8:
252
253  %1 = load <16 x i8>* %a
254  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
255  %2 = load <16 x i8>* %b
256  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
257  %3 = or <16 x i8> %1, %2
258  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
259  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
260  store <16 x i8> %4, <16 x i8>* %c
261  ; CHECK-DAG: st.b [[R3]], 0($4)
262
263  ret void
264  ; CHECK: .size nor_v16i8
265}
266
267define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
268  ; CHECK: nor_v8i16:
269
270  %1 = load <8 x i16>* %a
271  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
272  %2 = load <8 x i16>* %b
273  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
274  %3 = or <8 x i16> %1, %2
275  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
276  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
277  store <8 x i16> %4, <8 x i16>* %c
278  ; CHECK-DAG: st.h [[R3]], 0($4)
279
280  ret void
281  ; CHECK: .size nor_v8i16
282}
283
284define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
285  ; CHECK: nor_v4i32:
286
287  %1 = load <4 x i32>* %a
288  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
289  %2 = load <4 x i32>* %b
290  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
291  %3 = or <4 x i32> %1, %2
292  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
293  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
294  store <4 x i32> %4, <4 x i32>* %c
295  ; CHECK-DAG: st.w [[R3]], 0($4)
296
297  ret void
298  ; CHECK: .size nor_v4i32
299}
300
301define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
302  ; CHECK: nor_v2i64:
303
304  %1 = load <2 x i64>* %a
305  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
306  %2 = load <2 x i64>* %b
307  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
308  %3 = or <2 x i64> %1, %2
309  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
310  ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
311  store <2 x i64> %4, <2 x i64>* %c
312  ; CHECK-DAG: st.d [[R3]], 0($4)
313
314  ret void
315  ; CHECK: .size nor_v2i64
316}
317
318define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
319  ; CHECK: nor_v16i8_i:
320
321  %1 = load <16 x i8>* %a
322  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
323  %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
324  %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
325  ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1
326  store <16 x i8> %3, <16 x i8>* %c
327  ; CHECK-DAG: st.b [[R4]], 0($4)
328
329  ret void
330  ; CHECK: .size nor_v16i8_i
331}
332
333define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
334  ; CHECK: nor_v8i16_i:
335
336  %1 = load <8 x i16>* %a
337  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
338  %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
339  %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
340  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
341  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
342  store <8 x i16> %3, <8 x i16>* %c
343  ; CHECK-DAG: st.h [[R4]], 0($4)
344
345  ret void
346  ; CHECK: .size nor_v8i16_i
347}
348
349define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
350  ; CHECK: nor_v4i32_i:
351
352  %1 = load <4 x i32>* %a
353  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
354  %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
355  %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
356  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
357  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
358  store <4 x i32> %3, <4 x i32>* %c
359  ; CHECK-DAG: st.w [[R4]], 0($4)
360
361  ret void
362  ; CHECK: .size nor_v4i32_i
363}
364
365define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
366  ; CHECK: nor_v2i64_i:
367
368  %1 = load <2 x i64>* %a
369  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
370  %2 = or <2 x i64> %1, <i64 1, i64 1>
371  %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
372  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
373  ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
374  store <2 x i64> %3, <2 x i64>* %c
375  ; CHECK-DAG: st.d [[R4]], 0($4)
376
377  ret void
378  ; CHECK: .size nor_v2i64_i
379}
380
381define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
382  ; CHECK: xor_v16i8:
383
384  %1 = load <16 x i8>* %a
385  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
386  %2 = load <16 x i8>* %b
387  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
388  %3 = xor <16 x i8> %1, %2
389  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
390  store <16 x i8> %3, <16 x i8>* %c
391  ; CHECK-DAG: st.b [[R3]], 0($4)
392
393  ret void
394  ; CHECK: .size xor_v16i8
395}
396
397define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
398  ; CHECK: xor_v8i16:
399
400  %1 = load <8 x i16>* %a
401  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
402  %2 = load <8 x i16>* %b
403  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
404  %3 = xor <8 x i16> %1, %2
405  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
406  store <8 x i16> %3, <8 x i16>* %c
407  ; CHECK-DAG: st.h [[R3]], 0($4)
408
409  ret void
410  ; CHECK: .size xor_v8i16
411}
412
413define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
414  ; CHECK: xor_v4i32:
415
416  %1 = load <4 x i32>* %a
417  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
418  %2 = load <4 x i32>* %b
419  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
420  %3 = xor <4 x i32> %1, %2
421  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
422  store <4 x i32> %3, <4 x i32>* %c
423  ; CHECK-DAG: st.w [[R3]], 0($4)
424
425  ret void
426  ; CHECK: .size xor_v4i32
427}
428
429define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
430  ; CHECK: xor_v2i64:
431
432  %1 = load <2 x i64>* %a
433  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
434  %2 = load <2 x i64>* %b
435  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
436  %3 = xor <2 x i64> %1, %2
437  ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
438  store <2 x i64> %3, <2 x i64>* %c
439  ; CHECK-DAG: st.d [[R3]], 0($4)
440
441  ret void
442  ; CHECK: .size xor_v2i64
443}
444
445define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
446  ; CHECK: xor_v16i8_i:
447
448  %1 = load <16 x i8>* %a
449  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
450  %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
451  ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3
452  store <16 x i8> %2, <16 x i8>* %c
453  ; CHECK-DAG: st.b [[R4]], 0($4)
454
455  ret void
456  ; CHECK: .size xor_v16i8_i
457}
458
459define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
460  ; CHECK: xor_v8i16_i:
461
462  %1 = load <8 x i16>* %a
463  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
464  %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
465  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
466  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
467  store <8 x i16> %2, <8 x i16>* %c
468  ; CHECK-DAG: st.h [[R4]], 0($4)
469
470  ret void
471  ; CHECK: .size xor_v8i16_i
472}
473
474define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
475  ; CHECK: xor_v4i32_i:
476
477  %1 = load <4 x i32>* %a
478  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
479  %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
480  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
481  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
482  store <4 x i32> %2, <4 x i32>* %c
483  ; CHECK-DAG: st.w [[R4]], 0($4)
484
485  ret void
486  ; CHECK: .size xor_v4i32_i
487}
488
489define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
490  ; CHECK: xor_v2i64_i:
491
492  %1 = load <2 x i64>* %a
493  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
494  %2 = xor <2 x i64> %1, <i64 3, i64 3>
495  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
496  ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
497  store <2 x i64> %2, <2 x i64>* %c
498  ; CHECK-DAG: st.d [[R4]], 0($4)
499
500  ret void
501  ; CHECK: .size xor_v2i64_i
502}
503
504define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
505  ; CHECK: sll_v16i8:
506
507  %1 = load <16 x i8>* %a
508  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
509  %2 = load <16 x i8>* %b
510  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
511  %3 = shl <16 x i8> %1, %2
512  ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
513  store <16 x i8> %3, <16 x i8>* %c
514  ; CHECK-DAG: st.b [[R3]], 0($4)
515
516  ret void
517  ; CHECK: .size sll_v16i8
518}
519
520define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
521  ; CHECK: sll_v8i16:
522
523  %1 = load <8 x i16>* %a
524  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
525  %2 = load <8 x i16>* %b
526  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
527  %3 = shl <8 x i16> %1, %2
528  ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
529  store <8 x i16> %3, <8 x i16>* %c
530  ; CHECK-DAG: st.h [[R3]], 0($4)
531
532  ret void
533  ; CHECK: .size sll_v8i16
534}
535
536define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
537  ; CHECK: sll_v4i32:
538
539  %1 = load <4 x i32>* %a
540  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
541  %2 = load <4 x i32>* %b
542  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
543  %3 = shl <4 x i32> %1, %2
544  ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
545  store <4 x i32> %3, <4 x i32>* %c
546  ; CHECK-DAG: st.w [[R3]], 0($4)
547
548  ret void
549  ; CHECK: .size sll_v4i32
550}
551
552define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
553  ; CHECK: sll_v2i64:
554
555  %1 = load <2 x i64>* %a
556  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
557  %2 = load <2 x i64>* %b
558  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
559  %3 = shl <2 x i64> %1, %2
560  ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
561  store <2 x i64> %3, <2 x i64>* %c
562  ; CHECK-DAG: st.d [[R3]], 0($4)
563
564  ret void
565  ; CHECK: .size sll_v2i64
566}
567
568define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
569  ; CHECK: sll_v16i8_i:
570
571  %1 = load <16 x i8>* %a
572  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
573  %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
574  ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
575  store <16 x i8> %2, <16 x i8>* %c
576  ; CHECK-DAG: st.b [[R4]], 0($4)
577
578  ret void
579  ; CHECK: .size sll_v16i8_i
580}
581
582define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
583  ; CHECK: sll_v8i16_i:
584
585  %1 = load <8 x i16>* %a
586  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
587  %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
588  ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
589  store <8 x i16> %2, <8 x i16>* %c
590  ; CHECK-DAG: st.h [[R4]], 0($4)
591
592  ret void
593  ; CHECK: .size sll_v8i16_i
594}
595
596define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
597  ; CHECK: sll_v4i32_i:
598
599  %1 = load <4 x i32>* %a
600  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
601  %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
602  ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
603  store <4 x i32> %2, <4 x i32>* %c
604  ; CHECK-DAG: st.w [[R4]], 0($4)
605
606  ret void
607  ; CHECK: .size sll_v4i32_i
608}
609
610define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
611  ; CHECK: sll_v2i64_i:
612
613  %1 = load <2 x i64>* %a
614  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
615  %2 = shl <2 x i64> %1, <i64 1, i64 1>
616  ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
617  store <2 x i64> %2, <2 x i64>* %c
618  ; CHECK-DAG: st.d [[R4]], 0($4)
619
620  ret void
621  ; CHECK: .size sll_v2i64_i
622}
623
624define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
625  ; CHECK: sra_v16i8:
626
627  %1 = load <16 x i8>* %a
628  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
629  %2 = load <16 x i8>* %b
630  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
631  %3 = ashr <16 x i8> %1, %2
632  ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
633  store <16 x i8> %3, <16 x i8>* %c
634  ; CHECK-DAG: st.b [[R3]], 0($4)
635
636  ret void
637  ; CHECK: .size sra_v16i8
638}
639
640define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
641  ; CHECK: sra_v8i16:
642
643  %1 = load <8 x i16>* %a
644  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
645  %2 = load <8 x i16>* %b
646  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
647  %3 = ashr <8 x i16> %1, %2
648  ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
649  store <8 x i16> %3, <8 x i16>* %c
650  ; CHECK-DAG: st.h [[R3]], 0($4)
651
652  ret void
653  ; CHECK: .size sra_v8i16
654}
655
656define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
657  ; CHECK: sra_v4i32:
658
659  %1 = load <4 x i32>* %a
660  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
661  %2 = load <4 x i32>* %b
662  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
663  %3 = ashr <4 x i32> %1, %2
664  ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
665  store <4 x i32> %3, <4 x i32>* %c
666  ; CHECK-DAG: st.w [[R3]], 0($4)
667
668  ret void
669  ; CHECK: .size sra_v4i32
670}
671
672define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
673  ; CHECK: sra_v2i64:
674
675  %1 = load <2 x i64>* %a
676  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
677  %2 = load <2 x i64>* %b
678  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
679  %3 = ashr <2 x i64> %1, %2
680  ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
681  store <2 x i64> %3, <2 x i64>* %c
682  ; CHECK-DAG: st.d [[R3]], 0($4)
683
684  ret void
685  ; CHECK: .size sra_v2i64
686}
687
688define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
689  ; CHECK: sra_v16i8_i:
690
691  %1 = load <16 x i8>* %a
692  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
693  %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
694  ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
695  store <16 x i8> %2, <16 x i8>* %c
696  ; CHECK-DAG: st.b [[R4]], 0($4)
697
698  ret void
699  ; CHECK: .size sra_v16i8_i
700}
701
702define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
703  ; CHECK: sra_v8i16_i:
704
705  %1 = load <8 x i16>* %a
706  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
707  %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
708  ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
709  store <8 x i16> %2, <8 x i16>* %c
710  ; CHECK-DAG: st.h [[R4]], 0($4)
711
712  ret void
713  ; CHECK: .size sra_v8i16_i
714}
715
716define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
717  ; CHECK: sra_v4i32_i:
718
719  %1 = load <4 x i32>* %a
720  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
721  %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
722  ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
723  store <4 x i32> %2, <4 x i32>* %c
724  ; CHECK-DAG: st.w [[R4]], 0($4)
725
726  ret void
727  ; CHECK: .size sra_v4i32_i
728}
729
730define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
731  ; CHECK: sra_v2i64_i:
732
733  %1 = load <2 x i64>* %a
734  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
735  %2 = ashr <2 x i64> %1, <i64 1, i64 1>
736  ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
737  store <2 x i64> %2, <2 x i64>* %c
738  ; CHECK-DAG: st.d [[R4]], 0($4)
739
740  ret void
741  ; CHECK: .size sra_v2i64_i
742}
743
744define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
745  ; CHECK: srl_v16i8:
746
747  %1 = load <16 x i8>* %a
748  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
749  %2 = load <16 x i8>* %b
750  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
751  %3 = lshr <16 x i8> %1, %2
752  ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
753  store <16 x i8> %3, <16 x i8>* %c
754  ; CHECK-DAG: st.b [[R3]], 0($4)
755
756  ret void
757  ; CHECK: .size srl_v16i8
758}
759
760define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
761  ; CHECK: srl_v8i16:
762
763  %1 = load <8 x i16>* %a
764  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
765  %2 = load <8 x i16>* %b
766  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
767  %3 = lshr <8 x i16> %1, %2
768  ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
769  store <8 x i16> %3, <8 x i16>* %c
770  ; CHECK-DAG: st.h [[R3]], 0($4)
771
772  ret void
773  ; CHECK: .size srl_v8i16
774}
775
776define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
777  ; CHECK: srl_v4i32:
778
779  %1 = load <4 x i32>* %a
780  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
781  %2 = load <4 x i32>* %b
782  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
783  %3 = lshr <4 x i32> %1, %2
784  ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
785  store <4 x i32> %3, <4 x i32>* %c
786  ; CHECK-DAG: st.w [[R3]], 0($4)
787
788  ret void
789  ; CHECK: .size srl_v4i32
790}
791
792define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
793  ; CHECK: srl_v2i64:
794
795  %1 = load <2 x i64>* %a
796  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
797  %2 = load <2 x i64>* %b
798  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
799  %3 = lshr <2 x i64> %1, %2
800  ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
801  store <2 x i64> %3, <2 x i64>* %c
802  ; CHECK-DAG: st.d [[R3]], 0($4)
803
804  ret void
805  ; CHECK: .size srl_v2i64
806}
807
808define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
809  ; CHECK: srl_v16i8_i:
810
811  %1 = load <16 x i8>* %a
812  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
813  %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
814  ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
815  store <16 x i8> %2, <16 x i8>* %c
816  ; CHECK-DAG: st.b [[R4]], 0($4)
817
818  ret void
819  ; CHECK: .size srl_v16i8_i
820}
821
822define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
823  ; CHECK: srl_v8i16_i:
824
825  %1 = load <8 x i16>* %a
826  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
827  %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
828  ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
829  store <8 x i16> %2, <8 x i16>* %c
830  ; CHECK-DAG: st.h [[R4]], 0($4)
831
832  ret void
833  ; CHECK: .size srl_v8i16_i
834}
835
836define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
837  ; CHECK: srl_v4i32_i:
838
839  %1 = load <4 x i32>* %a
840  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
841  %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
842  ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
843  store <4 x i32> %2, <4 x i32>* %c
844  ; CHECK-DAG: st.w [[R4]], 0($4)
845
846  ret void
847  ; CHECK: .size srl_v4i32_i
848}
849
850define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
851  ; CHECK: srl_v2i64_i:
852
853  %1 = load <2 x i64>* %a
854  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
855  %2 = lshr <2 x i64> %1, <i64 1, i64 1>
856  ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
857  store <2 x i64> %2, <2 x i64>* %c
858  ; CHECK-DAG: st.d [[R4]], 0($4)
859
860  ret void
861  ; CHECK: .size srl_v2i64_i
862}
863
864define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
865  ; CHECK: ctpop_v16i8:
866
867  %1 = load <16 x i8>* %a
868  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
869  %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
870  ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
871  store <16 x i8> %2, <16 x i8>* %c
872  ; CHECK-DAG: st.b [[R3]], 0($4)
873
874  ret void
875  ; CHECK: .size ctpop_v16i8
876}
877
878define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
879  ; CHECK: ctpop_v8i16:
880
881  %1 = load <8 x i16>* %a
882  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
883  %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
884  ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
885  store <8 x i16> %2, <8 x i16>* %c
886  ; CHECK-DAG: st.h [[R3]], 0($4)
887
888  ret void
889  ; CHECK: .size ctpop_v8i16
890}
891
892define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
893  ; CHECK: ctpop_v4i32:
894
895  %1 = load <4 x i32>* %a
896  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
897  %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
898  ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
899  store <4 x i32> %2, <4 x i32>* %c
900  ; CHECK-DAG: st.w [[R3]], 0($4)
901
902  ret void
903  ; CHECK: .size ctpop_v4i32
904}
905
906define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
907  ; CHECK: ctpop_v2i64:
908
909  %1 = load <2 x i64>* %a
910  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
911  %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
912  ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
913  store <2 x i64> %2, <2 x i64>* %c
914  ; CHECK-DAG: st.d [[R3]], 0($4)
915
916  ret void
917  ; CHECK: .size ctpop_v2i64
918}
919
920define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
921  ; CHECK: ctlz_v16i8:
922
923  %1 = load <16 x i8>* %a
924  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
925  %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
926  ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
927  store <16 x i8> %2, <16 x i8>* %c
928  ; CHECK-DAG: st.b [[R3]], 0($4)
929
930  ret void
931  ; CHECK: .size ctlz_v16i8
932}
933
934define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
935  ; CHECK: ctlz_v8i16:
936
937  %1 = load <8 x i16>* %a
938  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
939  %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
940  ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
941  store <8 x i16> %2, <8 x i16>* %c
942  ; CHECK-DAG: st.h [[R3]], 0($4)
943
944  ret void
945  ; CHECK: .size ctlz_v8i16
946}
947
948define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
949  ; CHECK: ctlz_v4i32:
950
951  %1 = load <4 x i32>* %a
952  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
953  %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
954  ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
955  store <4 x i32> %2, <4 x i32>* %c
956  ; CHECK-DAG: st.w [[R3]], 0($4)
957
958  ret void
959  ; CHECK: .size ctlz_v4i32
960}
961
962define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
963  ; CHECK: ctlz_v2i64:
964
965  %1 = load <2 x i64>* %a
966  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
967  %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
968  ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
969  store <2 x i64> %2, <2 x i64>* %c
970  ; CHECK-DAG: st.d [[R3]], 0($4)
971
972  ret void
973  ; CHECK: .size ctlz_v2i64
974}
975
976define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
977  ; CHECK: bsel_v16i8:
978
979  %1 = load <16 x i8>* %a
980  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
981  %2 = load <16 x i8>* %b
982  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
983  %3 = load <16 x i8>* %m
984  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
985  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
986                          i8 -1, i8 -1, i8 -1, i8 -1,
987                          i8 -1, i8 -1, i8 -1, i8 -1,
988                          i8 -1, i8 -1, i8 -1, i8 -1>
989  %5 = and <16 x i8> %1, %3
990  %6 = and <16 x i8> %2, %4
991  %7 = or <16 x i8> %5, %6
992  ; bmnz is the same operation
993  ; CHECK-DAG: bmnz.v [[R1]], [[R2]], [[R3]]
994  store <16 x i8> %7, <16 x i8>* %c
995  ; CHECK-DAG: st.b [[R1]], 0($4)
996
997  ret void
998  ; CHECK: .size bsel_v16i8
999}
1000
1001define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
1002  ; CHECK: bsel_v16i8_i:
1003
1004  %1 = load <16 x i8>* %a
1005  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1006  %2 = load <16 x i8>* %m
1007  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
1008  %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
1009                          i8 -1, i8 -1, i8 -1, i8 -1,
1010                          i8 -1, i8 -1, i8 -1, i8 -1,
1011                          i8 -1, i8 -1, i8 -1, i8 -1>
1012  %4 = and <16 x i8> %1, %3
1013  %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6,
1014                      i8 6, i8 6, i8 6, i8 6,
1015                      i8 6, i8 6, i8 6, i8 6,
1016                      i8 6, i8 6, i8 6, i8 6>, %2
1017  %6 = or <16 x i8> %4, %5
1018  ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6
1019  store <16 x i8> %6, <16 x i8>* %c
1020  ; CHECK-DAG: st.b [[R3]], 0($4)
1021
1022  ret void
1023  ; CHECK: .size bsel_v16i8_i
1024}
1025
1026define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1027  ; CHECK: bsel_v8i16:
1028
1029  %1 = load <8 x i16>* %a
1030  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1031  %2 = load <8 x i16>* %b
1032  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1033  %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
1034                          i16 6, i16 6, i16 6, i16 6>
1035  %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529,
1036                          i16 65529, i16 65529, i16 65529, i16 65529>
1037  %5 = or <8 x i16> %3, %4
1038  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6
1039  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1040  store <8 x i16> %5, <8 x i16>* %c
1041  ; CHECK-DAG: st.h [[R3]], 0($4)
1042
1043  ret void
1044  ; CHECK: .size bsel_v8i16
1045}
1046
1047define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1048  ; CHECK: bsel_v4i32:
1049
1050  %1 = load <4 x i32>* %a
1051  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1052  %2 = load <4 x i32>* %b
1053  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1054  %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
1055  %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
1056  %5 = or <4 x i32> %3, %4
1057  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6
1058  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1059  store <4 x i32> %5, <4 x i32>* %c
1060  ; CHECK-DAG: st.w [[R3]], 0($4)
1061
1062  ret void
1063  ; CHECK: .size bsel_v4i32
1064}
1065
1066define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1067  ; CHECK: bsel_v2i64:
1068
1069  %1 = load <2 x i64>* %a
1070  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1071  %2 = load <2 x i64>* %b
1072  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1073  %3 = and <2 x i64> %1, <i64 6, i64 6>
1074  %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
1075  %5 = or <2 x i64> %3, %4
1076  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6
1077  ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1078  store <2 x i64> %5, <2 x i64>* %c
1079  ; CHECK-DAG: st.d [[R3]], 0($4)
1080
1081  ret void
1082  ; CHECK: .size bsel_v2i64
1083}
1084
1085define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1086  ; CHECK: binsl_v16i8_i:
1087
1088  %1 = load <16 x i8>* %a
1089  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1090  %2 = load <16 x i8>* %b
1091  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1092  %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
1093                          i8 192, i8 192, i8 192, i8 192,
1094                          i8 192, i8 192, i8 192, i8 192,
1095                          i8 192, i8 192, i8 192, i8 192>
1096  %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63,
1097                          i8 63, i8 63, i8 63, i8 63,
1098                          i8 63, i8 63, i8 63, i8 63,
1099                          i8 63, i8 63, i8 63, i8 63>
1100  %5 = or <16 x i8> %3, %4
1101  ; CHECK-DAG: binsli.b [[R2]], [[R1]], 2
1102  store <16 x i8> %5, <16 x i8>* %c
1103  ; CHECK-DAG: st.b [[R2]], 0($4)
1104
1105  ret void
1106  ; CHECK: .size binsl_v16i8_i
1107}
1108
1109define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1110  ; CHECK: binsl_v8i16_i:
1111
1112  %1 = load <8 x i16>* %a
1113  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1114  %2 = load <8 x i16>* %b
1115  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1116  %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
1117                          i16 49152, i16 49152, i16 49152, i16 49152>
1118  %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383,
1119                          i16 16383, i16 16383, i16 16383, i16 16383>
1120  %5 = or <8 x i16> %3, %4
1121  ; CHECK-DAG: binsli.h [[R2]], [[R1]], 2
1122  store <8 x i16> %5, <8 x i16>* %c
1123  ; CHECK-DAG: st.h [[R2]], 0($4)
1124
1125  ret void
1126  ; CHECK: .size binsl_v8i16_i
1127}
1128
1129define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1130  ; CHECK: binsl_v4i32_i:
1131
1132  %1 = load <4 x i32>* %a
1133  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1134  %2 = load <4 x i32>* %b
1135  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1136  %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
1137  %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
1138  %5 = or <4 x i32> %3, %4
1139  ; CHECK-DAG: binsli.w [[R2]], [[R1]], 2
1140  store <4 x i32> %5, <4 x i32>* %c
1141  ; CHECK-DAG: st.w [[R2]], 0($4)
1142
1143  ret void
1144  ; CHECK: .size binsl_v4i32_i
1145}
1146
1147define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1148  ; CHECK: binsl_v2i64_i:
1149
1150  %1 = load <2 x i64>* %a
1151  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1152  %2 = load <2 x i64>* %b
1153  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1154  %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
1155  %4 = and <2 x i64> %2, <i64 7, i64 7>
1156  %5 = or <2 x i64> %3, %4
1157  ; TODO: We use a particularly wide mask here to work around a legalization
1158  ;       issue. If the mask doesn't fit within a 10-bit immediate, it gets
1159  ;       legalized into a constant pool. We should add a test to cover the
1160  ;       other cases once they correctly select binsli.d.
1161  ; CHECK-DAG: binsli.d [[R2]], [[R1]], 61
1162  store <2 x i64> %5, <2 x i64>* %c
1163  ; CHECK-DAG: st.d [[R2]], 0($4)
1164
1165  ret void
1166  ; CHECK: .size binsl_v2i64_i
1167}
1168
1169define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1170  ; CHECK: binsr_v16i8_i:
1171
1172  %1 = load <16 x i8>* %a
1173  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1174  %2 = load <16 x i8>* %b
1175  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1176  %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
1177                          i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1178  %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252,
1179                          i8 252, i8 252, i8 252, i8 252,
1180                          i8 252, i8 252, i8 252, i8 252,
1181                          i8 252, i8 252, i8 252, i8 252>
1182  %5 = or <16 x i8> %3, %4
1183  ; CHECK-DAG: binsri.b [[R2]], [[R1]], 2
1184  store <16 x i8> %5, <16 x i8>* %c
1185  ; CHECK-DAG: st.b [[R2]], 0($4)
1186
1187  ret void
1188  ; CHECK: .size binsr_v16i8_i
1189}
1190
1191define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1192  ; CHECK: binsr_v8i16_i:
1193
1194  %1 = load <8 x i16>* %a
1195  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1196  %2 = load <8 x i16>* %b
1197  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1198  %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
1199                          i16 3, i16 3, i16 3, i16 3>
1200  %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532,
1201                          i16 65532, i16 65532, i16 65532, i16 65532>
1202  %5 = or <8 x i16> %3, %4
1203  ; CHECK-DAG: binsri.h [[R2]], [[R1]], 2
1204  store <8 x i16> %5, <8 x i16>* %c
1205  ; CHECK-DAG: st.h [[R2]], 0($4)
1206
1207  ret void
1208  ; CHECK: .size binsr_v8i16_i
1209}
1210
1211define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1212  ; CHECK: binsr_v4i32_i:
1213
1214  %1 = load <4 x i32>* %a
1215  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1216  %2 = load <4 x i32>* %b
1217  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1218  %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
1219  %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
1220  %5 = or <4 x i32> %3, %4
1221  ; CHECK-DAG: binsri.w [[R2]], [[R1]], 2
1222  store <4 x i32> %5, <4 x i32>* %c
1223  ; CHECK-DAG: st.w [[R2]], 0($4)
1224
1225  ret void
1226  ; CHECK: .size binsr_v4i32_i
1227}
1228
1229define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1230  ; CHECK: binsr_v2i64_i:
1231
1232  %1 = load <2 x i64>* %a
1233  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1234  %2 = load <2 x i64>* %b
1235  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1236  %3 = and <2 x i64> %1, <i64 3, i64 3>
1237  %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
1238  %5 = or <2 x i64> %3, %4
1239  ; CHECK-DAG: binsri.d [[R2]], [[R1]], 2
1240  store <2 x i64> %5, <2 x i64>* %c
1241  ; CHECK-DAG: st.d [[R2]], 0($4)
1242
1243  ret void
1244  ; CHECK: .size binsr_v2i64_i
1245}
1246
1247define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1248  ; CHECK: bclr_v16i8:
1249
1250  %1 = load <16 x i8>* %a
1251  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1252  %2 = load <16 x i8>* %b
1253  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1254  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1255  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1256  %5 = and <16 x i8> %1, %4
1257  ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1258  store <16 x i8> %5, <16 x i8>* %c
1259  ; CHECK-DAG: st.b [[R3]], 0($4)
1260
1261  ret void
1262  ; CHECK: .size bclr_v16i8
1263}
1264
1265define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1266  ; CHECK: bclr_v8i16:
1267
1268  %1 = load <8 x i16>* %a
1269  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1270  %2 = load <8 x i16>* %b
1271  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1272  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1273  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1274  %5 = and <8 x i16> %1, %4
1275  ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1276  store <8 x i16> %5, <8 x i16>* %c
1277  ; CHECK-DAG: st.h [[R3]], 0($4)
1278
1279  ret void
1280  ; CHECK: .size bclr_v8i16
1281}
1282
1283define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1284  ; CHECK: bclr_v4i32:
1285
1286  %1 = load <4 x i32>* %a
1287  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1288  %2 = load <4 x i32>* %b
1289  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1290  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1291  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
1292  %5 = and <4 x i32> %1, %4
1293  ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1294  store <4 x i32> %5, <4 x i32>* %c
1295  ; CHECK-DAG: st.w [[R3]], 0($4)
1296
1297  ret void
1298  ; CHECK: .size bclr_v4i32
1299}
1300
1301define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1302  ; CHECK: bclr_v2i64:
1303
1304  %1 = load <2 x i64>* %a
1305  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1306  %2 = load <2 x i64>* %b
1307  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1308  %3 = shl <2 x i64> <i64 1, i64 1>, %2
1309  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
1310  %5 = and <2 x i64> %1, %4
1311  ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1312  store <2 x i64> %5, <2 x i64>* %c
1313  ; CHECK-DAG: st.d [[R3]], 0($4)
1314
1315  ret void
1316  ; CHECK: .size bclr_v2i64
1317}
1318
1319define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1320  ; CHECK: bset_v16i8:
1321
1322  %1 = load <16 x i8>* %a
1323  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1324  %2 = load <16 x i8>* %b
1325  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1326  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1327  %4 = or <16 x i8> %1, %3
1328  ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1329  store <16 x i8> %4, <16 x i8>* %c
1330  ; CHECK-DAG: st.b [[R3]], 0($4)
1331
1332  ret void
1333  ; CHECK: .size bset_v16i8
1334}
1335
1336define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1337  ; CHECK: bset_v8i16:
1338
1339  %1 = load <8 x i16>* %a
1340  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1341  %2 = load <8 x i16>* %b
1342  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1343  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1344  %4 = or <8 x i16> %1, %3
1345  ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1346  store <8 x i16> %4, <8 x i16>* %c
1347  ; CHECK-DAG: st.h [[R3]], 0($4)
1348
1349  ret void
1350  ; CHECK: .size bset_v8i16
1351}
1352
1353define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1354  ; CHECK: bset_v4i32:
1355
1356  %1 = load <4 x i32>* %a
1357  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1358  %2 = load <4 x i32>* %b
1359  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1360  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1361  %4 = or <4 x i32> %1, %3
1362  ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1363  store <4 x i32> %4, <4 x i32>* %c
1364  ; CHECK-DAG: st.w [[R3]], 0($4)
1365
1366  ret void
1367  ; CHECK: .size bset_v4i32
1368}
1369
1370define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1371  ; CHECK: bset_v2i64:
1372
1373  %1 = load <2 x i64>* %a
1374  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1375  %2 = load <2 x i64>* %b
1376  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1377  %3 = shl <2 x i64> <i64 1, i64 1>, %2
1378  %4 = or <2 x i64> %1, %3
1379  ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1380  store <2 x i64> %4, <2 x i64>* %c
1381  ; CHECK-DAG: st.d [[R3]], 0($4)
1382
1383  ret void
1384  ; CHECK: .size bset_v2i64
1385}
1386
1387define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1388  ; CHECK: bneg_v16i8:
1389
1390  %1 = load <16 x i8>* %a
1391  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1392  %2 = load <16 x i8>* %b
1393  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1394  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1395  %4 = xor <16 x i8> %1, %3
1396  ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1397  store <16 x i8> %4, <16 x i8>* %c
1398  ; CHECK-DAG: st.b [[R3]], 0($4)
1399
1400  ret void
1401  ; CHECK: .size bneg_v16i8
1402}
1403
1404define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1405  ; CHECK: bneg_v8i16:
1406
1407  %1 = load <8 x i16>* %a
1408  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1409  %2 = load <8 x i16>* %b
1410  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1411  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1412  %4 = xor <8 x i16> %1, %3
1413  ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1414  store <8 x i16> %4, <8 x i16>* %c
1415  ; CHECK-DAG: st.h [[R3]], 0($4)
1416
1417  ret void
1418  ; CHECK: .size bneg_v8i16
1419}
1420
1421define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1422  ; CHECK: bneg_v4i32:
1423
1424  %1 = load <4 x i32>* %a
1425  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1426  %2 = load <4 x i32>* %b
1427  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1428  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1429  %4 = xor <4 x i32> %1, %3
1430  ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1431  store <4 x i32> %4, <4 x i32>* %c
1432  ; CHECK-DAG: st.w [[R3]], 0($4)
1433
1434  ret void
1435  ; CHECK: .size bneg_v4i32
1436}
1437
1438define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1439  ; CHECK: bneg_v2i64:
1440
1441  %1 = load <2 x i64>* %a
1442  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1443  %2 = load <2 x i64>* %b
1444  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1445  %3 = shl <2 x i64> <i64 1, i64 1>, %2
1446  %4 = xor <2 x i64> %1, %3
1447  ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1448  store <2 x i64> %4, <2 x i64>* %c
1449  ; CHECK-DAG: st.d [[R3]], 0($4)
1450
1451  ret void
1452  ; CHECK: .size bneg_v2i64
1453}
1454
1455define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1456  ; CHECK: bclri_v16i8:
1457
1458  %1 = load <16 x i8>* %a
1459  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1460  %2 = xor <16 x i8> <i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8>,
1461                     <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1462  %3 = and <16 x i8> %1, %2
1463  ; bclri.b and andi.b are exactly equivalent.
1464  ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247
1465  store <16 x i8> %3, <16 x i8>* %c
1466  ; CHECK-DAG: st.b [[R3]], 0($4)
1467
1468  ret void
1469  ; CHECK: .size bclri_v16i8
1470}
1471
1472define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1473  ; CHECK: bclri_v8i16:
1474
1475  %1 = load <8 x i16>* %a
1476  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1477  %2 = xor <8 x i16> <i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8>,
1478                     <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1479  %3 = and <8 x i16> %1, %2
1480  ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3
1481  store <8 x i16> %3, <8 x i16>* %c
1482  ; CHECK-DAG: st.h [[R3]], 0($4)
1483
1484  ret void
1485  ; CHECK: .size bclri_v8i16
1486}
1487
1488define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1489  ; CHECK: bclri_v4i32:
1490
1491  %1 = load <4 x i32>* %a
1492  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1493  %2 = xor <4 x i32> <i32  8, i32  8, i32  8, i32  8>,
1494                     <i32 -1, i32 -1, i32 -1, i32 -1>
1495  %3 = and <4 x i32> %1, %2
1496  ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3
1497  store <4 x i32> %3, <4 x i32>* %c
1498  ; CHECK-DAG: st.w [[R3]], 0($4)
1499
1500  ret void
1501  ; CHECK: .size bclri_v4i32
1502}
1503
1504define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1505  ; CHECK: bclri_v2i64:
1506
1507  %1 = load <2 x i64>* %a
1508  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1509  %2 = xor <2 x i64> <i64  8, i64  8>,
1510                     <i64 -1, i64 -1>
1511  %3 = and <2 x i64> %1, %2
1512  ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3
1513  store <2 x i64> %3, <2 x i64>* %c
1514  ; CHECK-DAG: st.d [[R3]], 0($4)
1515
1516  ret void
1517  ; CHECK: .size bclri_v2i64
1518}
1519
1520define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1521  ; CHECK: bseti_v16i8:
1522
1523  %1 = load <16 x i8>* %a
1524  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1525  %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1526  ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3
1527  store <16 x i8> %2, <16 x i8>* %c
1528  ; CHECK-DAG: st.b [[R3]], 0($4)
1529
1530  ret void
1531  ; CHECK: .size bseti_v16i8
1532}
1533
1534define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1535  ; CHECK: bseti_v8i16:
1536
1537  %1 = load <8 x i16>* %a
1538  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1539  %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1540  ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3
1541  store <8 x i16> %2, <8 x i16>* %c
1542  ; CHECK-DAG: st.h [[R3]], 0($4)
1543
1544  ret void
1545  ; CHECK: .size bseti_v8i16
1546}
1547
1548define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1549  ; CHECK: bseti_v4i32:
1550
1551  %1 = load <4 x i32>* %a
1552  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1553  %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
1554  ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3
1555  store <4 x i32> %2, <4 x i32>* %c
1556  ; CHECK-DAG: st.w [[R3]], 0($4)
1557
1558  ret void
1559  ; CHECK: .size bseti_v4i32
1560}
1561
1562define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1563  ; CHECK: bseti_v2i64:
1564
1565  %1 = load <2 x i64>* %a
1566  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1567  %2 = or <2 x i64> %1, <i64 8, i64 8>
1568  ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3
1569  store <2 x i64> %2, <2 x i64>* %c
1570  ; CHECK-DAG: st.d [[R3]], 0($4)
1571
1572  ret void
1573  ; CHECK: .size bseti_v2i64
1574}
1575
1576define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1577  ; CHECK: bnegi_v16i8:
1578
1579  %1 = load <16 x i8>* %a
1580  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1581  %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1582  ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3
1583  store <16 x i8> %2, <16 x i8>* %c
1584  ; CHECK-DAG: st.b [[R3]], 0($4)
1585
1586  ret void
1587  ; CHECK: .size bnegi_v16i8
1588}
1589
1590define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1591  ; CHECK: bnegi_v8i16:
1592
1593  %1 = load <8 x i16>* %a
1594  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1595  %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1596  ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3
1597  store <8 x i16> %2, <8 x i16>* %c
1598  ; CHECK-DAG: st.h [[R3]], 0($4)
1599
1600  ret void
1601  ; CHECK: .size bnegi_v8i16
1602}
1603
1604define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1605  ; CHECK: bnegi_v4i32:
1606
1607  %1 = load <4 x i32>* %a
1608  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1609  %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
1610  ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3
1611  store <4 x i32> %2, <4 x i32>* %c
1612  ; CHECK-DAG: st.w [[R3]], 0($4)
1613
1614  ret void
1615  ; CHECK: .size bnegi_v4i32
1616}
1617
1618define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1619  ; CHECK: bnegi_v2i64:
1620
1621  %1 = load <2 x i64>* %a
1622  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1623  %2 = xor <2 x i64> %1, <i64 8, i64 8>
1624  ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3
1625  store <2 x i64> %2, <2 x i64>* %c
1626  ; CHECK-DAG: st.d [[R3]], 0($4)
1627
1628  ret void
1629  ; CHECK: .size bnegi_v2i64
1630}
1631
1632declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
1633declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
1634declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
1635declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
1636declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
1637declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
1638declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
1639declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)
1640