1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
3
4define void @ceq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
5  ; CHECK: ceq_v16i8:
6
7  %1 = load <16 x i8>* %a
8  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9  %2 = load <16 x i8>* %b
10  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
11  %3 = icmp eq <16 x i8> %1, %2
12  %4 = sext <16 x i1> %3 to <16 x i8>
13  ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
14  store <16 x i8> %4, <16 x i8>* %c
15  ; CHECK-DAG: st.b [[R3]], 0($4)
16
17  ret void
18  ; CHECK: .size ceq_v16i8
19}
20
21define void @ceq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
22  ; CHECK: ceq_v8i16:
23
24  %1 = load <8 x i16>* %a
25  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
26  %2 = load <8 x i16>* %b
27  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
28  %3 = icmp eq <8 x i16> %1, %2
29  %4 = sext <8 x i1> %3 to <8 x i16>
30  ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
31  store <8 x i16> %4, <8 x i16>* %c
32  ; CHECK-DAG: st.h [[R3]], 0($4)
33
34  ret void
35  ; CHECK: .size ceq_v8i16
36}
37
38define void @ceq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
39  ; CHECK: ceq_v4i32:
40
41  %1 = load <4 x i32>* %a
42  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
43  %2 = load <4 x i32>* %b
44  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
45  %3 = icmp eq <4 x i32> %1, %2
46  %4 = sext <4 x i1> %3 to <4 x i32>
47  ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
48  store <4 x i32> %4, <4 x i32>* %c
49  ; CHECK-DAG: st.w [[R3]], 0($4)
50
51  ret void
52  ; CHECK: .size ceq_v4i32
53}
54
55define void @ceq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
56  ; CHECK: ceq_v2i64:
57
58  %1 = load <2 x i64>* %a
59  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
60  %2 = load <2 x i64>* %b
61  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
62  %3 = icmp eq <2 x i64> %1, %2
63  %4 = sext <2 x i1> %3 to <2 x i64>
64  ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
65  store <2 x i64> %4, <2 x i64>* %c
66  ; CHECK-DAG: st.d [[R3]], 0($4)
67
68  ret void
69  ; CHECK: .size ceq_v2i64
70}
71
72define void @cle_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
73  ; CHECK: cle_s_v16i8:
74
75  %1 = load <16 x i8>* %a
76  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
77  %2 = load <16 x i8>* %b
78  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
79  %3 = icmp sle <16 x i8> %1, %2
80  %4 = sext <16 x i1> %3 to <16 x i8>
81  ; CHECK-DAG: cle_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
82  store <16 x i8> %4, <16 x i8>* %c
83  ; CHECK-DAG: st.b [[R3]], 0($4)
84
85  ret void
86  ; CHECK: .size cle_s_v16i8
87}
88
89define void @cle_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
90  ; CHECK: cle_s_v8i16:
91
92  %1 = load <8 x i16>* %a
93  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
94  %2 = load <8 x i16>* %b
95  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
96  %3 = icmp sle <8 x i16> %1, %2
97  %4 = sext <8 x i1> %3 to <8 x i16>
98  ; CHECK-DAG: cle_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
99  store <8 x i16> %4, <8 x i16>* %c
100  ; CHECK-DAG: st.h [[R3]], 0($4)
101
102  ret void
103  ; CHECK: .size cle_s_v8i16
104}
105
106define void @cle_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
107  ; CHECK: cle_s_v4i32:
108
109  %1 = load <4 x i32>* %a
110  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
111  %2 = load <4 x i32>* %b
112  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
113  %3 = icmp sle <4 x i32> %1, %2
114  %4 = sext <4 x i1> %3 to <4 x i32>
115  ; CHECK-DAG: cle_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
116  store <4 x i32> %4, <4 x i32>* %c
117  ; CHECK-DAG: st.w [[R3]], 0($4)
118
119  ret void
120  ; CHECK: .size cle_s_v4i32
121}
122
123define void @cle_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
124  ; CHECK: cle_s_v2i64:
125
126  %1 = load <2 x i64>* %a
127  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
128  %2 = load <2 x i64>* %b
129  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
130  %3 = icmp sle <2 x i64> %1, %2
131  %4 = sext <2 x i1> %3 to <2 x i64>
132  ; CHECK-DAG: cle_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
133  store <2 x i64> %4, <2 x i64>* %c
134  ; CHECK-DAG: st.d [[R3]], 0($4)
135
136  ret void
137  ; CHECK: .size cle_s_v2i64
138}
139
140define void @cle_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
141  ; CHECK: cle_u_v16i8:
142
143  %1 = load <16 x i8>* %a
144  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
145  %2 = load <16 x i8>* %b
146  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
147  %3 = icmp ule <16 x i8> %1, %2
148  %4 = sext <16 x i1> %3 to <16 x i8>
149  ; CHECK-DAG: cle_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
150  store <16 x i8> %4, <16 x i8>* %c
151  ; CHECK-DAG: st.b [[R3]], 0($4)
152
153  ret void
154  ; CHECK: .size cle_u_v16i8
155}
156
157define void @cle_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
158  ; CHECK: cle_u_v8i16:
159
160  %1 = load <8 x i16>* %a
161  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
162  %2 = load <8 x i16>* %b
163  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
164  %3 = icmp ule <8 x i16> %1, %2
165  %4 = sext <8 x i1> %3 to <8 x i16>
166  ; CHECK-DAG: cle_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
167  store <8 x i16> %4, <8 x i16>* %c
168  ; CHECK-DAG: st.h [[R3]], 0($4)
169
170  ret void
171  ; CHECK: .size cle_u_v8i16
172}
173
174define void @cle_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
175  ; CHECK: cle_u_v4i32:
176
177  %1 = load <4 x i32>* %a
178  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
179  %2 = load <4 x i32>* %b
180  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
181  %3 = icmp ule <4 x i32> %1, %2
182  %4 = sext <4 x i1> %3 to <4 x i32>
183  ; CHECK-DAG: cle_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
184  store <4 x i32> %4, <4 x i32>* %c
185  ; CHECK-DAG: st.w [[R3]], 0($4)
186
187  ret void
188  ; CHECK: .size cle_u_v4i32
189}
190
191define void @cle_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
192  ; CHECK: cle_u_v2i64:
193
194  %1 = load <2 x i64>* %a
195  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
196  %2 = load <2 x i64>* %b
197  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
198  %3 = icmp ule <2 x i64> %1, %2
199  %4 = sext <2 x i1> %3 to <2 x i64>
200  ; CHECK-DAG: cle_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
201  store <2 x i64> %4, <2 x i64>* %c
202  ; CHECK-DAG: st.d [[R3]], 0($4)
203
204  ret void
205  ; CHECK: .size cle_u_v2i64
206}
207
208define void @clt_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
209  ; CHECK: clt_s_v16i8:
210
211  %1 = load <16 x i8>* %a
212  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
213  %2 = load <16 x i8>* %b
214  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
215  %3 = icmp slt <16 x i8> %1, %2
216  %4 = sext <16 x i1> %3 to <16 x i8>
217  ; CHECK-DAG: clt_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
218  store <16 x i8> %4, <16 x i8>* %c
219  ; CHECK-DAG: st.b [[R3]], 0($4)
220
221  ret void
222  ; CHECK: .size clt_s_v16i8
223}
224
225define void @clt_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
226  ; CHECK: clt_s_v8i16:
227
228  %1 = load <8 x i16>* %a
229  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
230  %2 = load <8 x i16>* %b
231  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
232  %3 = icmp slt <8 x i16> %1, %2
233  %4 = sext <8 x i1> %3 to <8 x i16>
234  ; CHECK-DAG: clt_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
235  store <8 x i16> %4, <8 x i16>* %c
236  ; CHECK-DAG: st.h [[R3]], 0($4)
237
238  ret void
239  ; CHECK: .size clt_s_v8i16
240}
241
242define void @clt_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
243  ; CHECK: clt_s_v4i32:
244
245  %1 = load <4 x i32>* %a
246  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
247  %2 = load <4 x i32>* %b
248  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
249  %3 = icmp slt <4 x i32> %1, %2
250  %4 = sext <4 x i1> %3 to <4 x i32>
251  ; CHECK-DAG: clt_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
252  store <4 x i32> %4, <4 x i32>* %c
253  ; CHECK-DAG: st.w [[R3]], 0($4)
254
255  ret void
256  ; CHECK: .size clt_s_v4i32
257}
258
259define void @clt_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
260  ; CHECK: clt_s_v2i64:
261
262  %1 = load <2 x i64>* %a
263  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
264  %2 = load <2 x i64>* %b
265  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
266  %3 = icmp slt <2 x i64> %1, %2
267  %4 = sext <2 x i1> %3 to <2 x i64>
268  ; CHECK-DAG: clt_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
269  store <2 x i64> %4, <2 x i64>* %c
270  ; CHECK-DAG: st.d [[R3]], 0($4)
271
272  ret void
273  ; CHECK: .size clt_s_v2i64
274}
275
276define void @clt_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
277  ; CHECK: clt_u_v16i8:
278
279  %1 = load <16 x i8>* %a
280  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
281  %2 = load <16 x i8>* %b
282  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
283  %3 = icmp ult <16 x i8> %1, %2
284  %4 = sext <16 x i1> %3 to <16 x i8>
285  ; CHECK-DAG: clt_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
286  store <16 x i8> %4, <16 x i8>* %c
287  ; CHECK-DAG: st.b [[R3]], 0($4)
288
289  ret void
290  ; CHECK: .size clt_u_v16i8
291}
292
293define void @clt_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
294  ; CHECK: clt_u_v8i16:
295
296  %1 = load <8 x i16>* %a
297  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
298  %2 = load <8 x i16>* %b
299  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
300  %3 = icmp ult <8 x i16> %1, %2
301  %4 = sext <8 x i1> %3 to <8 x i16>
302  ; CHECK-DAG: clt_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
303  store <8 x i16> %4, <8 x i16>* %c
304  ; CHECK-DAG: st.h [[R3]], 0($4)
305
306  ret void
307  ; CHECK: .size clt_u_v8i16
308}
309
310define void @clt_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
311  ; CHECK: clt_u_v4i32:
312
313  %1 = load <4 x i32>* %a
314  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
315  %2 = load <4 x i32>* %b
316  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
317  %3 = icmp ult <4 x i32> %1, %2
318  %4 = sext <4 x i1> %3 to <4 x i32>
319  ; CHECK-DAG: clt_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
320  store <4 x i32> %4, <4 x i32>* %c
321  ; CHECK-DAG: st.w [[R3]], 0($4)
322
323  ret void
324  ; CHECK: .size clt_u_v4i32
325}
326
327define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
328  ; CHECK: clt_u_v2i64:
329
330  %1 = load <2 x i64>* %a
331  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
332  %2 = load <2 x i64>* %b
333  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
334  %3 = icmp ult <2 x i64> %1, %2
335  %4 = sext <2 x i1> %3 to <2 x i64>
336  ; CHECK-DAG: clt_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
337  store <2 x i64> %4, <2 x i64>* %c
338  ; CHECK-DAG: st.d [[R3]], 0($4)
339
340  ret void
341  ; CHECK: .size clt_u_v2i64
342}
343
344; There is no != comparison, but test it anyway since we've had legalizer
345; issues in this area.
346define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
347  ; CHECK: cne_v16i8:
348  %1 = load <16 x i8>* %a
349  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
350  %2 = load <16 x i8>* %b
351  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
352  %3 = icmp ne <16 x i8> %1, %2
353  %4 = sext <16 x i1> %3 to <16 x i8>
354  ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
355  ; CHECK-DAG: xori.b [[R3]], [[R3]], 255
356  store <16 x i8> %4, <16 x i8>* %c
357  ; CHECK-DAG: st.b [[R3]], 0($4)
358
359  ret void
360  ; CHECK: .size cne_v16i8
361}
362
363; There is no != comparison, but test it anyway since we've had legalizer
364; issues in this area.
365define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
366  ; CHECK: cne_v8i16:
367
368  %1 = load <8 x i16>* %a
369  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
370  %2 = load <8 x i16>* %b
371  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
372  %3 = icmp ne <8 x i16> %1, %2
373  %4 = sext <8 x i1> %3 to <8 x i16>
374  ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
375  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
376  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
377  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
378  store <8 x i16> %4, <8 x i16>* %c
379  ; CHECK-DAG: st.h [[R3]], 0($4)
380
381  ret void
382  ; CHECK: .size cne_v8i16
383}
384
385; There is no != comparison, but test it anyway since we've had legalizer
386; issues in this area.
387define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
388  ; CHECK: cne_v4i32:
389
390  %1 = load <4 x i32>* %a
391  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
392  %2 = load <4 x i32>* %b
393  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
394  %3 = icmp ne <4 x i32> %1, %2
395  %4 = sext <4 x i1> %3 to <4 x i32>
396  ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
397  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
398  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
399  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
400  store <4 x i32> %4, <4 x i32>* %c
401  ; CHECK-DAG: st.w [[R3]], 0($4)
402
403  ret void
404  ; CHECK: .size cne_v4i32
405}
406
407; There is no != comparison, but test it anyway since we've had legalizer
408; issues in this area.
409define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
410  ; CHECK: cne_v2i64:
411
412  %1 = load <2 x i64>* %a
413  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
414  %2 = load <2 x i64>* %b
415  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
416  %3 = icmp ne <2 x i64> %1, %2
417  %4 = sext <2 x i1> %3 to <2 x i64>
418  ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
419  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
420  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
421  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
422  store <2 x i64> %4, <2 x i64>* %c
423  ; CHECK-DAG: st.d [[R3]], 0($4)
424
425  ret void
426  ; CHECK: .size cne_v2i64
427}
428
429define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
430  ; CHECK: ceqi_v16i8:
431
432  %1 = load <16 x i8>* %a
433  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
434  %2 = icmp eq <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
435  %3 = sext <16 x i1> %2 to <16 x i8>
436  ; CHECK-DAG: ceqi.b [[R3:\$w[0-9]+]], [[R1]], 1
437  store <16 x i8> %3, <16 x i8>* %c
438  ; CHECK-DAG: st.b [[R3]], 0($4)
439
440  ret void
441  ; CHECK: .size ceqi_v16i8
442}
443
444define void @ceqi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
445  ; CHECK: ceqi_v8i16:
446
447  %1 = load <8 x i16>* %a
448  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
449  %2 = icmp eq <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
450  %3 = sext <8 x i1> %2 to <8 x i16>
451  ; CHECK-DAG: ceqi.h [[R3:\$w[0-9]+]], [[R1]], 1
452  store <8 x i16> %3, <8 x i16>* %c
453  ; CHECK-DAG: st.h [[R3]], 0($4)
454
455  ret void
456  ; CHECK: .size ceqi_v8i16
457}
458
459define void @ceqi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
460  ; CHECK: ceqi_v4i32:
461
462  %1 = load <4 x i32>* %a
463  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
464  %2 = icmp eq <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
465  %3 = sext <4 x i1> %2 to <4 x i32>
466  ; CHECK-DAG: ceqi.w [[R3:\$w[0-9]+]], [[R1]], 1
467  store <4 x i32> %3, <4 x i32>* %c
468  ; CHECK-DAG: st.w [[R3]], 0($4)
469
470  ret void
471  ; CHECK: .size ceqi_v4i32
472}
473
474define void @ceqi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
475  ; CHECK: ceqi_v2i64:
476
477  %1 = load <2 x i64>* %a
478  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
479  %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
480  %3 = sext <2 x i1> %2 to <2 x i64>
481  ; CHECK-DAG: ceqi.d [[R3:\$w[0-9]+]], [[R1]], 1
482  store <2 x i64> %3, <2 x i64>* %c
483  ; CHECK-DAG: st.d [[R3]], 0($4)
484
485  ret void
486  ; CHECK: .size ceqi_v2i64
487}
488
489define void @clei_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
490  ; CHECK: clei_s_v16i8:
491
492  %1 = load <16 x i8>* %a
493  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
494  %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
495  %3 = sext <16 x i1> %2 to <16 x i8>
496  ; CHECK-DAG: clei_s.b [[R3:\$w[0-9]+]], [[R1]], 1
497  store <16 x i8> %3, <16 x i8>* %c
498  ; CHECK-DAG: st.b [[R3]], 0($4)
499
500  ret void
501  ; CHECK: .size clei_s_v16i8
502}
503
504define void @clei_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
505  ; CHECK: clei_s_v8i16:
506
507  %1 = load <8 x i16>* %a
508  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
509  %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
510  %3 = sext <8 x i1> %2 to <8 x i16>
511  ; CHECK-DAG: clei_s.h [[R3:\$w[0-9]+]], [[R1]], 1
512  store <8 x i16> %3, <8 x i16>* %c
513  ; CHECK-DAG: st.h [[R3]], 0($4)
514
515  ret void
516  ; CHECK: .size clei_s_v8i16
517}
518
519define void @clei_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
520  ; CHECK: clei_s_v4i32:
521
522  %1 = load <4 x i32>* %a
523  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
524  %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
525  %3 = sext <4 x i1> %2 to <4 x i32>
526  ; CHECK-DAG: clei_s.w [[R3:\$w[0-9]+]], [[R1]], 1
527  store <4 x i32> %3, <4 x i32>* %c
528  ; CHECK-DAG: st.w [[R3]], 0($4)
529
530  ret void
531  ; CHECK: .size clei_s_v4i32
532}
533
534define void @clei_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
535  ; CHECK: clei_s_v2i64:
536
537  %1 = load <2 x i64>* %a
538  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
539  %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
540  %3 = sext <2 x i1> %2 to <2 x i64>
541  ; CHECK-DAG: clei_s.d [[R3:\$w[0-9]+]], [[R1]], 1
542  store <2 x i64> %3, <2 x i64>* %c
543  ; CHECK-DAG: st.d [[R3]], 0($4)
544
545  ret void
546  ; CHECK: .size clei_s_v2i64
547}
548
549define void @clei_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
550  ; CHECK: clei_u_v16i8:
551
552  %1 = load <16 x i8>* %a
553  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
554  %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
555  %3 = sext <16 x i1> %2 to <16 x i8>
556  ; CHECK-DAG: clei_u.b [[R3:\$w[0-9]+]], [[R1]], 1
557  store <16 x i8> %3, <16 x i8>* %c
558  ; CHECK-DAG: st.b [[R3]], 0($4)
559
560  ret void
561  ; CHECK: .size clei_u_v16i8
562}
563
564define void @clei_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
565  ; CHECK: clei_u_v8i16:
566
567  %1 = load <8 x i16>* %a
568  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
569  %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
570  %3 = sext <8 x i1> %2 to <8 x i16>
571  ; CHECK-DAG: clei_u.h [[R3:\$w[0-9]+]], [[R1]], 1
572  store <8 x i16> %3, <8 x i16>* %c
573  ; CHECK-DAG: st.h [[R3]], 0($4)
574
575  ret void
576  ; CHECK: .size clei_u_v8i16
577}
578
579define void @clei_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
580  ; CHECK: clei_u_v4i32:
581
582  %1 = load <4 x i32>* %a
583  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
584  %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
585  %3 = sext <4 x i1> %2 to <4 x i32>
586  ; CHECK-DAG: clei_u.w [[R3:\$w[0-9]+]], [[R1]], 1
587  store <4 x i32> %3, <4 x i32>* %c
588  ; CHECK-DAG: st.w [[R3]], 0($4)
589
590  ret void
591  ; CHECK: .size clei_u_v4i32
592}
593
594define void @clei_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
595  ; CHECK: clei_u_v2i64:
596
597  %1 = load <2 x i64>* %a
598  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
599  %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
600  %3 = sext <2 x i1> %2 to <2 x i64>
601  ; CHECK-DAG: clei_u.d [[R3:\$w[0-9]+]], [[R1]], 1
602  store <2 x i64> %3, <2 x i64>* %c
603  ; CHECK-DAG: st.d [[R3]], 0($4)
604
605  ret void
606  ; CHECK: .size clei_u_v2i64
607}
608
609define void @clti_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
610  ; CHECK: clti_s_v16i8:
611
612  %1 = load <16 x i8>* %a
613  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
614  %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
615  %3 = sext <16 x i1> %2 to <16 x i8>
616  ; CHECK-DAG: clti_s.b [[R3:\$w[0-9]+]], [[R1]], 1
617  store <16 x i8> %3, <16 x i8>* %c
618  ; CHECK-DAG: st.b [[R3]], 0($4)
619
620  ret void
621  ; CHECK: .size clti_s_v16i8
622}
623
624define void @clti_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
625  ; CHECK: clti_s_v8i16:
626
627  %1 = load <8 x i16>* %a
628  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
629  %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
630  %3 = sext <8 x i1> %2 to <8 x i16>
631  ; CHECK-DAG: clti_s.h [[R3:\$w[0-9]+]], [[R1]], 1
632  store <8 x i16> %3, <8 x i16>* %c
633  ; CHECK-DAG: st.h [[R3]], 0($4)
634
635  ret void
636  ; CHECK: .size clti_s_v8i16
637}
638
639define void @clti_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
640  ; CHECK: clti_s_v4i32:
641
642  %1 = load <4 x i32>* %a
643  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
644  %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
645  %3 = sext <4 x i1> %2 to <4 x i32>
646  ; CHECK-DAG: clti_s.w [[R3:\$w[0-9]+]], [[R1]], 1
647  store <4 x i32> %3, <4 x i32>* %c
648  ; CHECK-DAG: st.w [[R3]], 0($4)
649
650  ret void
651  ; CHECK: .size clti_s_v4i32
652}
653
654define void @clti_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
655  ; CHECK: clti_s_v2i64:
656
657  %1 = load <2 x i64>* %a
658  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
659  %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
660  %3 = sext <2 x i1> %2 to <2 x i64>
661  ; CHECK-DAG: clti_s.d [[R3:\$w[0-9]+]], [[R1]], 1
662  store <2 x i64> %3, <2 x i64>* %c
663  ; CHECK-DAG: st.d [[R3]], 0($4)
664
665  ret void
666  ; CHECK: .size clti_s_v2i64
667}
668
669define void @clti_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
670  ; CHECK: clti_u_v16i8:
671
672  %1 = load <16 x i8>* %a
673  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
674  %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
675  %3 = sext <16 x i1> %2 to <16 x i8>
676  ; CHECK-DAG: clti_u.b [[R3:\$w[0-9]+]], [[R1]], 1
677  store <16 x i8> %3, <16 x i8>* %c
678  ; CHECK-DAG: st.b [[R3]], 0($4)
679
680  ret void
681  ; CHECK: .size clti_u_v16i8
682}
683
684define void @clti_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
685  ; CHECK: clti_u_v8i16:
686
687  %1 = load <8 x i16>* %a
688  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
689  %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
690  %3 = sext <8 x i1> %2 to <8 x i16>
691  ; CHECK-DAG: clti_u.h [[R3:\$w[0-9]+]], [[R1]], 1
692  store <8 x i16> %3, <8 x i16>* %c
693  ; CHECK-DAG: st.h [[R3]], 0($4)
694
695  ret void
696  ; CHECK: .size clti_u_v8i16
697}
698
699define void @clti_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
700  ; CHECK: clti_u_v4i32:
701
702  %1 = load <4 x i32>* %a
703  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
704  %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
705  %3 = sext <4 x i1> %2 to <4 x i32>
706  ; CHECK-DAG: clti_u.w [[R3:\$w[0-9]+]], [[R1]], 1
707  store <4 x i32> %3, <4 x i32>* %c
708  ; CHECK-DAG: st.w [[R3]], 0($4)
709
710  ret void
711  ; CHECK: .size clti_u_v4i32
712}
713
714define void @clti_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
715  ; CHECK: clti_u_v2i64:
716
717  %1 = load <2 x i64>* %a
718  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
719  %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
720  %3 = sext <2 x i1> %2 to <2 x i64>
721  ; CHECK-DAG: clti_u.d [[R3:\$w[0-9]+]], [[R1]], 1
722  store <2 x i64> %3, <2 x i64>* %c
723  ; CHECK-DAG: st.d [[R3]], 0($4)
724
725  ret void
726  ; CHECK: .size clti_u_v2i64
727}
728
729define void @bsel_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
730                        <16 x i8>* %c) nounwind {
731  ; CHECK: bsel_s_v16i8:
732
733  %1 = load <16 x i8>* %a
734  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
735  %2 = load <16 x i8>* %b
736  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
737  %3 = load <16 x i8>* %c
738  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
739  %4 = icmp sgt <16 x i8> %1, %2
740  ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
741  %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
742  ; bmnz.v is the same operation
743  ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
744  store <16 x i8> %5, <16 x i8>* %d
745  ; CHECK-DAG: st.b [[R3]], 0($4)
746
747  ret void
748  ; CHECK: .size bsel_s_v16i8
749}
750
751define void @bsel_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
752                        <8 x i16>* %c) nounwind {
753  ; CHECK: bsel_s_v8i16:
754
755  %1 = load <8 x i16>* %a
756  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
757  %2 = load <8 x i16>* %b
758  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
759  %3 = load <8 x i16>* %c
760  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
761  %4 = icmp sgt <8 x i16> %1, %2
762  ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
763  %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
764  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
765  store <8 x i16> %5, <8 x i16>* %d
766  ; CHECK-DAG: st.h [[R4]], 0($4)
767
768  ret void
769  ; CHECK: .size bsel_s_v8i16
770}
771
772define void @bsel_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
773                        <4 x i32>* %c) nounwind {
774  ; CHECK: bsel_s_v4i32:
775
776  %1 = load <4 x i32>* %a
777  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
778  %2 = load <4 x i32>* %b
779  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
780  %3 = load <4 x i32>* %c
781  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
782  %4 = icmp sgt <4 x i32> %1, %2
783  ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
784  %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
785  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
786  store <4 x i32> %5, <4 x i32>* %d
787  ; CHECK-DAG: st.w [[R4]], 0($4)
788
789  ret void
790  ; CHECK: .size bsel_s_v4i32
791}
792
793define void @bsel_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
794                        <2 x i64>* %c) nounwind {
795  ; CHECK: bsel_s_v2i64:
796
797  %1 = load <2 x i64>* %a
798  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
799  %2 = load <2 x i64>* %b
800  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
801  %3 = load <2 x i64>* %c
802  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
803  %4 = icmp sgt <2 x i64> %1, %2
804  ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
805  %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
806  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
807  store <2 x i64> %5, <2 x i64>* %d
808  ; CHECK-DAG: st.d [[R4]], 0($4)
809
810  ret void
811  ; CHECK: .size bsel_s_v2i64
812}
813
814define void @bsel_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
815                        <16 x i8>* %c) nounwind {
816  ; CHECK: bsel_u_v16i8:
817
818  %1 = load <16 x i8>* %a
819  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
820  %2 = load <16 x i8>* %b
821  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
822  %3 = load <16 x i8>* %c
823  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
824  %4 = icmp ugt <16 x i8> %1, %2
825  ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
826  %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
827  ; bmnz.v is the same operation
828  ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
829  store <16 x i8> %5, <16 x i8>* %d
830  ; CHECK-DAG: st.b [[R3]], 0($4)
831
832  ret void
833  ; CHECK: .size bsel_u_v16i8
834}
835
836define void @bsel_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
837                        <8 x i16>* %c) nounwind {
838  ; CHECK: bsel_u_v8i16:
839
840  %1 = load <8 x i16>* %a
841  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
842  %2 = load <8 x i16>* %b
843  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
844  %3 = load <8 x i16>* %c
845  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
846  %4 = icmp ugt <8 x i16> %1, %2
847  ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
848  %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3
849  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
850  store <8 x i16> %5, <8 x i16>* %d
851  ; CHECK-DAG: st.h [[R4]], 0($4)
852
853  ret void
854  ; CHECK: .size bsel_u_v8i16
855}
856
857define void @bsel_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
858                        <4 x i32>* %c) nounwind {
859  ; CHECK: bsel_u_v4i32:
860
861  %1 = load <4 x i32>* %a
862  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
863  %2 = load <4 x i32>* %b
864  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
865  %3 = load <4 x i32>* %c
866  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
867  %4 = icmp ugt <4 x i32> %1, %2
868  ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
869  %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3
870  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
871  store <4 x i32> %5, <4 x i32>* %d
872  ; CHECK-DAG: st.w [[R4]], 0($4)
873
874  ret void
875  ; CHECK: .size bsel_u_v4i32
876}
877
878define void @bsel_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
879                        <2 x i64>* %c) nounwind {
880  ; CHECK: bsel_u_v2i64:
881
882  %1 = load <2 x i64>* %a
883  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
884  %2 = load <2 x i64>* %b
885  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
886  %3 = load <2 x i64>* %c
887  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
888  %4 = icmp ugt <2 x i64> %1, %2
889  ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
890  %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3
891  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
892  store <2 x i64> %5, <2 x i64>* %d
893  ; CHECK-DAG: st.d [[R4]], 0($4)
894
895  ret void
896  ; CHECK: .size bsel_u_v2i64
897}
898
899define void @bseli_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
900                        <16 x i8>* %c) nounwind {
901  ; CHECK: bseli_s_v16i8:
902
903  %1 = load <16 x i8>* %a
904  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
905  %2 = load <16 x i8>* %b
906  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
907  %3 = icmp sgt <16 x i8> %1, %2
908  ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
909  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
910  ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
911  store <16 x i8> %4, <16 x i8>* %d
912  ; CHECK-DAG: st.b [[R4]], 0($4)
913
914  ret void
915  ; CHECK: .size bseli_s_v16i8
916}
917
918define void @bseli_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
919                        <8 x i16>* %c) nounwind {
920  ; CHECK: bseli_s_v8i16:
921
922  %1 = load <8 x i16>* %a
923  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
924  %2 = load <8 x i16>* %b
925  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
926  %3 = icmp sgt <8 x i16> %1, %2
927  ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
928  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
929  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
930  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
931  store <8 x i16> %4, <8 x i16>* %d
932  ; CHECK-DAG: st.h [[R4]], 0($4)
933
934  ret void
935  ; CHECK: .size bseli_s_v8i16
936}
937
938define void @bseli_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
939                        <4 x i32>* %c) nounwind {
940  ; CHECK: bseli_s_v4i32:
941
942  %1 = load <4 x i32>* %a
943  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
944  %2 = load <4 x i32>* %b
945  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
946  %3 = icmp sgt <4 x i32> %1, %2
947  ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
948  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
949  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
950  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
951  store <4 x i32> %4, <4 x i32>* %d
952  ; CHECK-DAG: st.w [[R4]], 0($4)
953
954  ret void
955  ; CHECK: .size bseli_s_v4i32
956}
957
958define void @bseli_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
959                        <2 x i64>* %c) nounwind {
960  ; CHECK: bseli_s_v2i64:
961
962  %1 = load <2 x i64>* %a
963  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
964  %2 = load <2 x i64>* %b
965  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
966  %3 = icmp sgt <2 x i64> %1, %2
967  ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
968  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
969  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
970  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
971  store <2 x i64> %4, <2 x i64>* %d
972  ; CHECK-DAG: st.d [[R4]], 0($4)
973
974  ret void
975  ; CHECK: .size bseli_s_v2i64
976}
977
978define void @bseli_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
979                        <16 x i8>* %c) nounwind {
980  ; CHECK: bseli_u_v16i8:
981
982  %1 = load <16 x i8>* %a
983  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
984  %2 = load <16 x i8>* %b
985  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
986  %3 = icmp ugt <16 x i8> %1, %2
987  ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
988  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
989  ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1
990  store <16 x i8> %4, <16 x i8>* %d
991  ; CHECK-DAG: st.b [[R4]], 0($4)
992
993  ret void
994  ; CHECK: .size bseli_u_v16i8
995}
996
997define void @bseli_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
998                        <8 x i16>* %c) nounwind {
999  ; CHECK: bseli_u_v8i16:
1000
1001  %1 = load <8 x i16>* %a
1002  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1003  %2 = load <8 x i16>* %b
1004  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1005  %3 = icmp ugt <8 x i16> %1, %2
1006  ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
1007  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1008  ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
1009  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
1010  store <8 x i16> %4, <8 x i16>* %d
1011  ; CHECK-DAG: st.h [[R4]], 0($4)
1012
1013  ret void
1014  ; CHECK: .size bseli_u_v8i16
1015}
1016
1017define void @bseli_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
1018                        <4 x i32>* %c) nounwind {
1019  ; CHECK: bseli_u_v4i32:
1020
1021  %1 = load <4 x i32>* %a
1022  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1023  %2 = load <4 x i32>* %b
1024  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1025  %3 = icmp ugt <4 x i32> %1, %2
1026  ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
1027  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1028  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
1029  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
1030  store <4 x i32> %4, <4 x i32>* %d
1031  ; CHECK-DAG: st.w [[R4]], 0($4)
1032
1033  ret void
1034  ; CHECK: .size bseli_u_v4i32
1035}
1036
1037define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
1038                        <2 x i64>* %c) nounwind {
1039  ; CHECK: bseli_u_v2i64:
1040
1041  %1 = load <2 x i64>* %a
1042  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1043  %2 = load <2 x i64>* %b
1044  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1045  %3 = icmp ugt <2 x i64> %1, %2
1046  ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
1047  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
1048  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
1049  ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
1050  store <2 x i64> %4, <2 x i64>* %d
1051  ; CHECK-DAG: st.d [[R4]], 0($4)
1052
1053  ret void
1054  ; CHECK: .size bseli_u_v2i64
1055}
1056
1057define void @max_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1058  ; CHECK: max_s_v16i8:
1059
1060  %1 = load <16 x i8>* %a
1061  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1062  %2 = load <16 x i8>* %b
1063  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1064  %3 = icmp sgt <16 x i8> %1, %2
1065  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1066  ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1067  store <16 x i8> %4, <16 x i8>* %c
1068  ; CHECK-DAG: st.b [[R3]], 0($4)
1069
1070  ret void
1071  ; CHECK: .size max_s_v16i8
1072}
1073
1074define void @max_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1075  ; CHECK: max_s_v8i16:
1076
1077  %1 = load <8 x i16>* %a
1078  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1079  %2 = load <8 x i16>* %b
1080  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1081  %3 = icmp sgt <8 x i16> %1, %2
1082  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1083  ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1084  store <8 x i16> %4, <8 x i16>* %c
1085  ; CHECK-DAG: st.h [[R3]], 0($4)
1086
1087  ret void
1088  ; CHECK: .size max_s_v8i16
1089}
1090
1091define void @max_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1092  ; CHECK: max_s_v4i32:
1093
1094  %1 = load <4 x i32>* %a
1095  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1096  %2 = load <4 x i32>* %b
1097  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1098  %3 = icmp sgt <4 x i32> %1, %2
1099  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1100  ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1101  store <4 x i32> %4, <4 x i32>* %c
1102  ; CHECK-DAG: st.w [[R3]], 0($4)
1103
1104  ret void
1105  ; CHECK: .size max_s_v4i32
1106}
1107
1108define void @max_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1109  ; CHECK: max_s_v2i64:
1110
1111  %1 = load <2 x i64>* %a
1112  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1113  %2 = load <2 x i64>* %b
1114  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1115  %3 = icmp sgt <2 x i64> %1, %2
1116  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1117  ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1118  store <2 x i64> %4, <2 x i64>* %c
1119  ; CHECK-DAG: st.d [[R3]], 0($4)
1120
1121  ret void
1122  ; CHECK: .size max_s_v2i64
1123}
1124
1125define void @max_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1126  ; CHECK: max_u_v16i8:
1127
1128  %1 = load <16 x i8>* %a
1129  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1130  %2 = load <16 x i8>* %b
1131  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1132  %3 = icmp ugt <16 x i8> %1, %2
1133  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1134  ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1135  store <16 x i8> %4, <16 x i8>* %c
1136  ; CHECK-DAG: st.b [[R3]], 0($4)
1137
1138  ret void
1139  ; CHECK: .size max_u_v16i8
1140}
1141
1142define void @max_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1143  ; CHECK: max_u_v8i16:
1144
1145  %1 = load <8 x i16>* %a
1146  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1147  %2 = load <8 x i16>* %b
1148  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1149  %3 = icmp ugt <8 x i16> %1, %2
1150  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1151  ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1152  store <8 x i16> %4, <8 x i16>* %c
1153  ; CHECK-DAG: st.h [[R3]], 0($4)
1154
1155  ret void
1156  ; CHECK: .size max_u_v8i16
1157}
1158
1159define void @max_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1160  ; CHECK: max_u_v4i32:
1161
1162  %1 = load <4 x i32>* %a
1163  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1164  %2 = load <4 x i32>* %b
1165  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1166  %3 = icmp ugt <4 x i32> %1, %2
1167  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1168  ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1169  store <4 x i32> %4, <4 x i32>* %c
1170  ; CHECK-DAG: st.w [[R3]], 0($4)
1171
1172  ret void
1173  ; CHECK: .size max_u_v4i32
1174}
1175
1176define void @max_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1177  ; CHECK: max_u_v2i64:
1178
1179  %1 = load <2 x i64>* %a
1180  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1181  %2 = load <2 x i64>* %b
1182  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1183  %3 = icmp ugt <2 x i64> %1, %2
1184  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1185  ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1186  store <2 x i64> %4, <2 x i64>* %c
1187  ; CHECK-DAG: st.d [[R3]], 0($4)
1188
1189  ret void
1190  ; CHECK: .size max_u_v2i64
1191}
1192
1193define void @max_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1194  ; CHECK: max_s_eq_v16i8:
1195
1196  %1 = load <16 x i8>* %a
1197  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1198  %2 = load <16 x i8>* %b
1199  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1200  %3 = icmp sge <16 x i8> %1, %2
1201  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1202  ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1203  store <16 x i8> %4, <16 x i8>* %c
1204  ; CHECK-DAG: st.b [[R3]], 0($4)
1205
1206  ret void
1207  ; CHECK: .size max_s_eq_v16i8
1208}
1209
1210define void @max_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1211  ; CHECK: max_s_eq_v8i16:
1212
1213  %1 = load <8 x i16>* %a
1214  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1215  %2 = load <8 x i16>* %b
1216  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1217  %3 = icmp sge <8 x i16> %1, %2
1218  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1219  ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1220  store <8 x i16> %4, <8 x i16>* %c
1221  ; CHECK-DAG: st.h [[R3]], 0($4)
1222
1223  ret void
1224  ; CHECK: .size max_s_eq_v8i16
1225}
1226
1227define void @max_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1228  ; CHECK: max_s_eq_v4i32:
1229
1230  %1 = load <4 x i32>* %a
1231  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1232  %2 = load <4 x i32>* %b
1233  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1234  %3 = icmp sge <4 x i32> %1, %2
1235  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1236  ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1237  store <4 x i32> %4, <4 x i32>* %c
1238  ; CHECK-DAG: st.w [[R3]], 0($4)
1239
1240  ret void
1241  ; CHECK: .size max_s_eq_v4i32
1242}
1243
1244define void @max_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1245  ; CHECK: max_s_eq_v2i64:
1246
1247  %1 = load <2 x i64>* %a
1248  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1249  %2 = load <2 x i64>* %b
1250  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1251  %3 = icmp sge <2 x i64> %1, %2
1252  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1253  ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1254  store <2 x i64> %4, <2 x i64>* %c
1255  ; CHECK-DAG: st.d [[R3]], 0($4)
1256
1257  ret void
1258  ; CHECK: .size max_s_eq_v2i64
1259}
1260
1261define void @max_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1262  ; CHECK: max_u_eq_v16i8:
1263
1264  %1 = load <16 x i8>* %a
1265  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1266  %2 = load <16 x i8>* %b
1267  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1268  %3 = icmp uge <16 x i8> %1, %2
1269  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1270  ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1271  store <16 x i8> %4, <16 x i8>* %c
1272  ; CHECK-DAG: st.b [[R3]], 0($4)
1273
1274  ret void
1275  ; CHECK: .size max_u_eq_v16i8
1276}
1277
1278define void @max_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1279  ; CHECK: max_u_eq_v8i16:
1280
1281  %1 = load <8 x i16>* %a
1282  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1283  %2 = load <8 x i16>* %b
1284  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1285  %3 = icmp uge <8 x i16> %1, %2
1286  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1287  ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1288  store <8 x i16> %4, <8 x i16>* %c
1289  ; CHECK-DAG: st.h [[R3]], 0($4)
1290
1291  ret void
1292  ; CHECK: .size max_u_eq_v8i16
1293}
1294
1295define void @max_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1296  ; CHECK: max_u_eq_v4i32:
1297
1298  %1 = load <4 x i32>* %a
1299  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1300  %2 = load <4 x i32>* %b
1301  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1302  %3 = icmp uge <4 x i32> %1, %2
1303  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1304  ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1305  store <4 x i32> %4, <4 x i32>* %c
1306  ; CHECK-DAG: st.w [[R3]], 0($4)
1307
1308  ret void
1309  ; CHECK: .size max_u_eq_v4i32
1310}
1311
1312define void @max_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1313  ; CHECK: max_u_eq_v2i64:
1314
1315  %1 = load <2 x i64>* %a
1316  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1317  %2 = load <2 x i64>* %b
1318  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1319  %3 = icmp uge <2 x i64> %1, %2
1320  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1321  ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1322  store <2 x i64> %4, <2 x i64>* %c
1323  ; CHECK-DAG: st.d [[R3]], 0($4)
1324
1325  ret void
1326  ; CHECK: .size max_u_eq_v2i64
1327}
1328
1329define void @maxi_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1330  ; CHECK: maxi_s_v16i8:
1331
1332  %1 = load <16 x i8>* %a
1333  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1334  %2 = icmp sgt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1335  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1336  ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1
1337  store <16 x i8> %3, <16 x i8>* %c
1338  ; CHECK-DAG: st.b [[R3]], 0($4)
1339
1340  ret void
1341  ; CHECK: .size maxi_s_v16i8
1342}
1343
1344define void @maxi_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1345  ; CHECK: maxi_s_v8i16:
1346
1347  %1 = load <8 x i16>* %a
1348  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1349  %2 = icmp sgt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1350  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1351  ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1
1352  store <8 x i16> %3, <8 x i16>* %c
1353  ; CHECK-DAG: st.h [[R3]], 0($4)
1354
1355  ret void
1356  ; CHECK: .size maxi_s_v8i16
1357}
1358
1359define void @maxi_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1360  ; CHECK: maxi_s_v4i32:
1361
1362  %1 = load <4 x i32>* %a
1363  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1364  %2 = icmp sgt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
1365  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1366  ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1
1367  store <4 x i32> %3, <4 x i32>* %c
1368  ; CHECK-DAG: st.w [[R3]], 0($4)
1369
1370  ret void
1371  ; CHECK: .size maxi_s_v4i32
1372}
1373
1374define void @maxi_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1375  ; CHECK: maxi_s_v2i64:
1376
1377  %1 = load <2 x i64>* %a
1378  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1379  %2 = icmp sgt <2 x i64> %1, <i64 1, i64 1>
1380  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
1381  ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1
1382  store <2 x i64> %3, <2 x i64>* %c
1383  ; CHECK-DAG: st.d [[R3]], 0($4)
1384
1385  ret void
1386  ; CHECK: .size maxi_s_v2i64
1387}
1388
1389define void @maxi_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1390  ; CHECK: maxi_u_v16i8:
1391
1392  %1 = load <16 x i8>* %a
1393  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1394  %2 = icmp ugt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1395  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1396  ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1
1397  store <16 x i8> %3, <16 x i8>* %c
1398  ; CHECK-DAG: st.b [[R3]], 0($4)
1399
1400  ret void
1401  ; CHECK: .size maxi_u_v16i8
1402}
1403
1404define void @maxi_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1405  ; CHECK: maxi_u_v8i16:
1406
1407  %1 = load <8 x i16>* %a
1408  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1409  %2 = icmp ugt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1410  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1411  ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1
1412  store <8 x i16> %3, <8 x i16>* %c
1413  ; CHECK-DAG: st.h [[R3]], 0($4)
1414
1415  ret void
1416  ; CHECK: .size maxi_u_v8i16
1417}
1418
1419define void @maxi_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1420  ; CHECK: maxi_u_v4i32:
1421
1422  %1 = load <4 x i32>* %a
1423  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1424  %2 = icmp ugt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
1425  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1426  ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1
1427  store <4 x i32> %3, <4 x i32>* %c
1428  ; CHECK-DAG: st.w [[R3]], 0($4)
1429
1430  ret void
1431  ; CHECK: .size maxi_u_v4i32
1432}
1433
1434define void @maxi_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1435  ; CHECK: maxi_u_v2i64:
1436
1437  %1 = load <2 x i64>* %a
1438  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1439  %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
1440  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
1441  ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1
1442  store <2 x i64> %3, <2 x i64>* %c
1443  ; CHECK-DAG: st.d [[R3]], 0($4)
1444
1445  ret void
1446  ; CHECK: .size maxi_u_v2i64
1447}
1448
1449define void @maxi_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1450  ; CHECK: maxi_s_eq_v16i8:
1451
1452  %1 = load <16 x i8>* %a
1453  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1454  %2 = icmp sge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1455  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1456  ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1
1457  store <16 x i8> %3, <16 x i8>* %c
1458  ; CHECK-DAG: st.b [[R3]], 0($4)
1459
1460  ret void
1461  ; CHECK: .size maxi_s_eq_v16i8
1462}
1463
1464define void @maxi_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1465  ; CHECK: maxi_s_eq_v8i16:
1466
1467  %1 = load <8 x i16>* %a
1468  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1469  %2 = icmp sge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1470  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1471  ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1
1472  store <8 x i16> %3, <8 x i16>* %c
1473  ; CHECK-DAG: st.h [[R3]], 0($4)
1474
1475  ret void
1476  ; CHECK: .size maxi_s_eq_v8i16
1477}
1478
1479define void @maxi_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1480  ; CHECK: maxi_s_eq_v4i32:
1481
1482  %1 = load <4 x i32>* %a
1483  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1484  %2 = icmp sge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
1485  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1486  ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1
1487  store <4 x i32> %3, <4 x i32>* %c
1488  ; CHECK-DAG: st.w [[R3]], 0($4)
1489
1490  ret void
1491  ; CHECK: .size maxi_s_eq_v4i32
1492}
1493
1494define void @maxi_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1495  ; CHECK: maxi_s_eq_v2i64:
1496
1497  %1 = load <2 x i64>* %a
1498  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1499  %2 = icmp sge <2 x i64> %1, <i64 1, i64 1>
1500  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
1501  ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1
1502  store <2 x i64> %3, <2 x i64>* %c
1503  ; CHECK-DAG: st.d [[R3]], 0($4)
1504
1505  ret void
1506  ; CHECK: .size maxi_s_eq_v2i64
1507}
1508
1509define void @maxi_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1510  ; CHECK: maxi_u_eq_v16i8:
1511
1512  %1 = load <16 x i8>* %a
1513  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1514  %2 = icmp uge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1515  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1516  ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1
1517  store <16 x i8> %3, <16 x i8>* %c
1518  ; CHECK-DAG: st.b [[R3]], 0($4)
1519
1520  ret void
1521  ; CHECK: .size maxi_u_eq_v16i8
1522}
1523
1524define void @maxi_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1525  ; CHECK: maxi_u_eq_v8i16:
1526
1527  %1 = load <8 x i16>* %a
1528  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1529  %2 = icmp uge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1530  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1531  ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1
1532  store <8 x i16> %3, <8 x i16>* %c
1533  ; CHECK-DAG: st.h [[R3]], 0($4)
1534
1535  ret void
1536  ; CHECK: .size maxi_u_eq_v8i16
1537}
1538
1539define void @maxi_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1540  ; CHECK: maxi_u_eq_v4i32:
1541
1542  %1 = load <4 x i32>* %a
1543  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1544  %2 = icmp uge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
1545  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1546  ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1
1547  store <4 x i32> %3, <4 x i32>* %c
1548  ; CHECK-DAG: st.w [[R3]], 0($4)
1549
1550  ret void
1551  ; CHECK: .size maxi_u_eq_v4i32
1552}
1553
1554define void @maxi_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1555  ; CHECK: maxi_u_eq_v2i64:
1556
1557  %1 = load <2 x i64>* %a
1558  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1559  %2 = icmp uge <2 x i64> %1, <i64 1, i64 1>
1560  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
1561  ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1
1562  store <2 x i64> %3, <2 x i64>* %c
1563  ; CHECK-DAG: st.d [[R3]], 0($4)
1564
1565  ret void
1566  ; CHECK: .size maxi_u_eq_v2i64
1567}
1568
1569define void @min_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1570  ; CHECK: min_s_v16i8:
1571
1572  %1 = load <16 x i8>* %a
1573  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1574  %2 = load <16 x i8>* %b
1575  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1576  %3 = icmp sle <16 x i8> %1, %2
1577  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1578  ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1579  store <16 x i8> %4, <16 x i8>* %c
1580  ; CHECK-DAG: st.b [[R3]], 0($4)
1581
1582  ret void
1583  ; CHECK: .size min_s_v16i8
1584}
1585
1586define void @min_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1587  ; CHECK: min_s_v8i16:
1588
1589  %1 = load <8 x i16>* %a
1590  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1591  %2 = load <8 x i16>* %b
1592  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1593  %3 = icmp slt <8 x i16> %1, %2
1594  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1595  ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1596  store <8 x i16> %4, <8 x i16>* %c
1597  ; CHECK-DAG: st.h [[R3]], 0($4)
1598
1599  ret void
1600  ; CHECK: .size min_s_v8i16
1601}
1602
1603define void @min_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1604  ; CHECK: min_s_v4i32:
1605
1606  %1 = load <4 x i32>* %a
1607  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1608  %2 = load <4 x i32>* %b
1609  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1610  %3 = icmp slt <4 x i32> %1, %2
1611  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1612  ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1613  store <4 x i32> %4, <4 x i32>* %c
1614  ; CHECK-DAG: st.w [[R3]], 0($4)
1615
1616  ret void
1617  ; CHECK: .size min_s_v4i32
1618}
1619
1620define void @min_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1621  ; CHECK: min_s_v2i64:
1622
1623  %1 = load <2 x i64>* %a
1624  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1625  %2 = load <2 x i64>* %b
1626  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1627  %3 = icmp slt <2 x i64> %1, %2
1628  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1629  ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1630  store <2 x i64> %4, <2 x i64>* %c
1631  ; CHECK-DAG: st.d [[R3]], 0($4)
1632
1633  ret void
1634  ; CHECK: .size min_s_v2i64
1635}
1636
1637define void @min_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1638  ; CHECK: min_u_v16i8:
1639
1640  %1 = load <16 x i8>* %a
1641  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1642  %2 = load <16 x i8>* %b
1643  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1644  %3 = icmp ult <16 x i8> %1, %2
1645  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1646  ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1647  store <16 x i8> %4, <16 x i8>* %c
1648  ; CHECK-DAG: st.b [[R3]], 0($4)
1649
1650  ret void
1651  ; CHECK: .size min_u_v16i8
1652}
1653
1654define void @min_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1655  ; CHECK: min_u_v8i16:
1656
1657  %1 = load <8 x i16>* %a
1658  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1659  %2 = load <8 x i16>* %b
1660  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1661  %3 = icmp ult <8 x i16> %1, %2
1662  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1663  ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1664  store <8 x i16> %4, <8 x i16>* %c
1665  ; CHECK-DAG: st.h [[R3]], 0($4)
1666
1667  ret void
1668  ; CHECK: .size min_u_v8i16
1669}
1670
1671define void @min_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1672  ; CHECK: min_u_v4i32:
1673
1674  %1 = load <4 x i32>* %a
1675  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1676  %2 = load <4 x i32>* %b
1677  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1678  %3 = icmp ult <4 x i32> %1, %2
1679  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1680  ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1681  store <4 x i32> %4, <4 x i32>* %c
1682  ; CHECK-DAG: st.w [[R3]], 0($4)
1683
1684  ret void
1685  ; CHECK: .size min_u_v4i32
1686}
1687
1688define void @min_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1689  ; CHECK: min_u_v2i64:
1690
1691  %1 = load <2 x i64>* %a
1692  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1693  %2 = load <2 x i64>* %b
1694  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1695  %3 = icmp ult <2 x i64> %1, %2
1696  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1697  ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1698  store <2 x i64> %4, <2 x i64>* %c
1699  ; CHECK-DAG: st.d [[R3]], 0($4)
1700
1701  ret void
1702  ; CHECK: .size min_u_v2i64
1703}
1704
1705define void @min_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1706  ; CHECK: min_s_eq_v16i8:
1707
1708  %1 = load <16 x i8>* %a
1709  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1710  %2 = load <16 x i8>* %b
1711  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1712  %3 = icmp sle <16 x i8> %1, %2
1713  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1714  ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1715  store <16 x i8> %4, <16 x i8>* %c
1716  ; CHECK-DAG: st.b [[R3]], 0($4)
1717
1718  ret void
1719  ; CHECK: .size min_s_eq_v16i8
1720}
1721
1722define void @min_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1723  ; CHECK: min_s_eq_v8i16:
1724
1725  %1 = load <8 x i16>* %a
1726  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1727  %2 = load <8 x i16>* %b
1728  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1729  %3 = icmp sle <8 x i16> %1, %2
1730  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1731  ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1732  store <8 x i16> %4, <8 x i16>* %c
1733  ; CHECK-DAG: st.h [[R3]], 0($4)
1734
1735  ret void
1736  ; CHECK: .size min_s_eq_v8i16
1737}
1738
1739define void @min_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1740  ; CHECK: min_s_eq_v4i32:
1741
1742  %1 = load <4 x i32>* %a
1743  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1744  %2 = load <4 x i32>* %b
1745  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1746  %3 = icmp sle <4 x i32> %1, %2
1747  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1748  ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1749  store <4 x i32> %4, <4 x i32>* %c
1750  ; CHECK-DAG: st.w [[R3]], 0($4)
1751
1752  ret void
1753  ; CHECK: .size min_s_eq_v4i32
1754}
1755
1756define void @min_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1757  ; CHECK: min_s_eq_v2i64:
1758
1759  %1 = load <2 x i64>* %a
1760  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1761  %2 = load <2 x i64>* %b
1762  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1763  %3 = icmp sle <2 x i64> %1, %2
1764  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1765  ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1766  store <2 x i64> %4, <2 x i64>* %c
1767  ; CHECK-DAG: st.d [[R3]], 0($4)
1768
1769  ret void
1770  ; CHECK: .size min_s_eq_v2i64
1771}
1772
1773define void @min_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1774  ; CHECK: min_u_eq_v16i8:
1775
1776  %1 = load <16 x i8>* %a
1777  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1778  %2 = load <16 x i8>* %b
1779  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1780  %3 = icmp ule <16 x i8> %1, %2
1781  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1782  ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1783  store <16 x i8> %4, <16 x i8>* %c
1784  ; CHECK-DAG: st.b [[R3]], 0($4)
1785
1786  ret void
1787  ; CHECK: .size min_u_eq_v16i8
1788}
1789
1790define void @min_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1791  ; CHECK: min_u_eq_v8i16:
1792
1793  %1 = load <8 x i16>* %a
1794  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1795  %2 = load <8 x i16>* %b
1796  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1797  %3 = icmp ule <8 x i16> %1, %2
1798  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1799  ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1800  store <8 x i16> %4, <8 x i16>* %c
1801  ; CHECK-DAG: st.h [[R3]], 0($4)
1802
1803  ret void
1804  ; CHECK: .size min_u_eq_v8i16
1805}
1806
1807define void @min_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1808  ; CHECK: min_u_eq_v4i32:
1809
1810  %1 = load <4 x i32>* %a
1811  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1812  %2 = load <4 x i32>* %b
1813  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1814  %3 = icmp ule <4 x i32> %1, %2
1815  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1816  ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1817  store <4 x i32> %4, <4 x i32>* %c
1818  ; CHECK-DAG: st.w [[R3]], 0($4)
1819
1820  ret void
1821  ; CHECK: .size min_u_eq_v4i32
1822}
1823
1824define void @min_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1825  ; CHECK: min_u_eq_v2i64:
1826
1827  %1 = load <2 x i64>* %a
1828  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1829  %2 = load <2 x i64>* %b
1830  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1831  %3 = icmp ule <2 x i64> %1, %2
1832  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1833  ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1834  store <2 x i64> %4, <2 x i64>* %c
1835  ; CHECK-DAG: st.d [[R3]], 0($4)
1836
1837  ret void
1838  ; CHECK: .size min_u_eq_v2i64
1839}
1840
1841define void @mini_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1842  ; CHECK: mini_s_v16i8:
1843
1844  %1 = load <16 x i8>* %a
1845  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1846  %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1847  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1848  ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1
1849  store <16 x i8> %3, <16 x i8>* %c
1850  ; CHECK-DAG: st.b [[R3]], 0($4)
1851
1852  ret void
1853  ; CHECK: .size mini_s_v16i8
1854}
1855
1856define void @mini_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1857  ; CHECK: mini_s_v8i16:
1858
1859  %1 = load <8 x i16>* %a
1860  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1861  %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1862  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1863  ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1
1864  store <8 x i16> %3, <8 x i16>* %c
1865  ; CHECK-DAG: st.h [[R3]], 0($4)
1866
1867  ret void
1868  ; CHECK: .size mini_s_v8i16
1869}
1870
1871define void @mini_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1872  ; CHECK: mini_s_v4i32:
1873
1874  %1 = load <4 x i32>* %a
1875  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1876  %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
1877  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1878  ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1
1879  store <4 x i32> %3, <4 x i32>* %c
1880  ; CHECK-DAG: st.w [[R3]], 0($4)
1881
1882  ret void
1883  ; CHECK: .size mini_s_v4i32
1884}
1885
1886define void @mini_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1887  ; CHECK: mini_s_v2i64:
1888
1889  %1 = load <2 x i64>* %a
1890  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1891  %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
1892  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
1893  ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1
1894  store <2 x i64> %3, <2 x i64>* %c
1895  ; CHECK-DAG: st.d [[R3]], 0($4)
1896
1897  ret void
1898  ; CHECK: .size mini_s_v2i64
1899}
1900
1901define void @mini_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1902  ; CHECK: mini_u_v16i8:
1903
1904  %1 = load <16 x i8>* %a
1905  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1906  %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1907  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1908  ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1
1909  store <16 x i8> %3, <16 x i8>* %c
1910  ; CHECK-DAG: st.b [[R3]], 0($4)
1911
1912  ret void
1913  ; CHECK: .size mini_u_v16i8
1914}
1915
1916define void @mini_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1917  ; CHECK: mini_u_v8i16:
1918
1919  %1 = load <8 x i16>* %a
1920  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1921  %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1922  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1923  ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1
1924  store <8 x i16> %3, <8 x i16>* %c
1925  ; CHECK-DAG: st.h [[R3]], 0($4)
1926
1927  ret void
1928  ; CHECK: .size mini_u_v8i16
1929}
1930
1931define void @mini_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1932  ; CHECK: mini_u_v4i32:
1933
1934  %1 = load <4 x i32>* %a
1935  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1936  %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
1937  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1938  ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1
1939  store <4 x i32> %3, <4 x i32>* %c
1940  ; CHECK-DAG: st.w [[R3]], 0($4)
1941
1942  ret void
1943  ; CHECK: .size mini_u_v4i32
1944}
1945
1946define void @mini_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1947  ; CHECK: mini_u_v2i64:
1948
1949  %1 = load <2 x i64>* %a
1950  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1951  %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
1952  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
1953  ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1
1954  store <2 x i64> %3, <2 x i64>* %c
1955  ; CHECK-DAG: st.d [[R3]], 0($4)
1956
1957  ret void
1958  ; CHECK: .size mini_u_v2i64
1959}
1960
1961define void @mini_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1962  ; CHECK: mini_s_eq_v16i8:
1963
1964  %1 = load <16 x i8>* %a
1965  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1966  %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1967  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1968  ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1
1969  store <16 x i8> %3, <16 x i8>* %c
1970  ; CHECK-DAG: st.b [[R3]], 0($4)
1971
1972  ret void
1973  ; CHECK: .size mini_s_eq_v16i8
1974}
1975
1976define void @mini_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1977  ; CHECK: mini_s_eq_v8i16:
1978
1979  %1 = load <8 x i16>* %a
1980  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1981  %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1982  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1983  ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1
1984  store <8 x i16> %3, <8 x i16>* %c
1985  ; CHECK-DAG: st.h [[R3]], 0($4)
1986
1987  ret void
1988  ; CHECK: .size mini_s_eq_v8i16
1989}
1990
1991define void @mini_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1992  ; CHECK: mini_s_eq_v4i32:
1993
1994  %1 = load <4 x i32>* %a
1995  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1996  %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
1997  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1998  ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1
1999  store <4 x i32> %3, <4 x i32>* %c
2000  ; CHECK-DAG: st.w [[R3]], 0($4)
2001
2002  ret void
2003  ; CHECK: .size mini_s_eq_v4i32
2004}
2005
2006define void @mini_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
2007  ; CHECK: mini_s_eq_v2i64:
2008
2009  %1 = load <2 x i64>* %a
2010  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
2011  %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
2012  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
2013  ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1
2014  store <2 x i64> %3, <2 x i64>* %c
2015  ; CHECK-DAG: st.d [[R3]], 0($4)
2016
2017  ret void
2018  ; CHECK: .size mini_s_eq_v2i64
2019}
2020
2021define void @mini_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
2022  ; CHECK: mini_u_eq_v16i8:
2023
2024  %1 = load <16 x i8>* %a
2025  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
2026  %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2027  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2028  ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1
2029  store <16 x i8> %3, <16 x i8>* %c
2030  ; CHECK-DAG: st.b [[R3]], 0($4)
2031
2032  ret void
2033  ; CHECK: .size mini_u_eq_v16i8
2034}
2035
2036define void @mini_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
2037  ; CHECK: mini_u_eq_v8i16:
2038
2039  %1 = load <8 x i16>* %a
2040  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
2041  %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2042  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2043  ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1
2044  store <8 x i16> %3, <8 x i16>* %c
2045  ; CHECK-DAG: st.h [[R3]], 0($4)
2046
2047  ret void
2048  ; CHECK: .size mini_u_eq_v8i16
2049}
2050
2051define void @mini_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
2052  ; CHECK: mini_u_eq_v4i32:
2053
2054  %1 = load <4 x i32>* %a
2055  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
2056  %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
2057  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2058  ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1
2059  store <4 x i32> %3, <4 x i32>* %c
2060  ; CHECK-DAG: st.w [[R3]], 0($4)
2061
2062  ret void
2063  ; CHECK: .size mini_u_eq_v4i32
2064}
2065
2066define void @mini_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
2067  ; CHECK: mini_u_eq_v2i64:
2068
2069  %1 = load <2 x i64>* %a
2070  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
2071  %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
2072  %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
2073  ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1
2074  store <2 x i64> %3, <2 x i64>* %c
2075  ; CHECK-DAG: st.d [[R3]], 0($4)
2076
2077  ret void
2078  ; CHECK: .size mini_u_eq_v2i64
2079}
2080