1; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
2; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
3; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
4; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
5; RUN: llc -march=x86-64 -mcpu=knl < %s | FileCheck %s  -check-prefix=AVX2 -check-prefix=AVX512F
6; RUN: llc -march=x86-64 -mcpu=skx < %s | FileCheck %s  -check-prefix=AVX512BW -check-prefix=AVX512VL -check-prefix=AVX512F
7
8define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
9vector.ph:
10  br label %vector.body
11
12vector.body:                                      ; preds = %vector.body, %vector.ph
13  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
14  %gep.a = getelementptr inbounds i8* %a, i64 %index
15  %gep.b = getelementptr inbounds i8* %b, i64 %index
16  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
17  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
18  %load.a = load <16 x i8>* %ptr.a, align 2
19  %load.b = load <16 x i8>* %ptr.b, align 2
20  %cmp = icmp slt <16 x i8> %load.a, %load.b
21  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
22  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
23  %index.next = add i64 %index, 16
24  %loop = icmp eq i64 %index.next, 16384
25  br i1 %loop, label %for.end, label %vector.body
26
27for.end:                                          ; preds = %vector.body
28  ret void
29
30; SSE4-LABEL: test1:
31; SSE4: pminsb
32
33; AVX1-LABEL: test1:
34; AVX1: vpminsb
35
36; AVX2-LABEL: test1:
37; AVX2: vpminsb
38
39; AVX512VL-LABEL: test1:
40; AVX512VL: vpminsb
41}
42
43define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
44vector.ph:
45  br label %vector.body
46
47vector.body:                                      ; preds = %vector.body, %vector.ph
48  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
49  %gep.a = getelementptr inbounds i8* %a, i64 %index
50  %gep.b = getelementptr inbounds i8* %b, i64 %index
51  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
52  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
53  %load.a = load <16 x i8>* %ptr.a, align 2
54  %load.b = load <16 x i8>* %ptr.b, align 2
55  %cmp = icmp sle <16 x i8> %load.a, %load.b
56  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
57  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
58  %index.next = add i64 %index, 16
59  %loop = icmp eq i64 %index.next, 16384
60  br i1 %loop, label %for.end, label %vector.body
61
62for.end:                                          ; preds = %vector.body
63  ret void
64
65; SSE4-LABEL: test2:
66; SSE4: pminsb
67
68; AVX1-LABEL: test2:
69; AVX1: vpminsb
70
71; AVX2-LABEL: test2:
72; AVX2: vpminsb
73
74; AVX512VL-LABEL: test2:
75; AVX512VL: vpminsb
76}
77
78define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
79vector.ph:
80  br label %vector.body
81
82vector.body:                                      ; preds = %vector.body, %vector.ph
83  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
84  %gep.a = getelementptr inbounds i8* %a, i64 %index
85  %gep.b = getelementptr inbounds i8* %b, i64 %index
86  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
87  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
88  %load.a = load <16 x i8>* %ptr.a, align 2
89  %load.b = load <16 x i8>* %ptr.b, align 2
90  %cmp = icmp sgt <16 x i8> %load.a, %load.b
91  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
92  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
93  %index.next = add i64 %index, 16
94  %loop = icmp eq i64 %index.next, 16384
95  br i1 %loop, label %for.end, label %vector.body
96
97for.end:                                          ; preds = %vector.body
98  ret void
99
100; SSE4-LABEL: test3:
101; SSE4: pmaxsb
102
103; AVX1-LABEL: test3:
104; AVX1: vpmaxsb
105
106; AVX2-LABEL: test3:
107; AVX2: vpmaxsb
108
109; AVX512VL-LABEL: test3:
110; AVX512VL: vpmaxsb
111}
112
113define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
114vector.ph:
115  br label %vector.body
116
117vector.body:                                      ; preds = %vector.body, %vector.ph
118  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
119  %gep.a = getelementptr inbounds i8* %a, i64 %index
120  %gep.b = getelementptr inbounds i8* %b, i64 %index
121  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
122  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
123  %load.a = load <16 x i8>* %ptr.a, align 2
124  %load.b = load <16 x i8>* %ptr.b, align 2
125  %cmp = icmp sge <16 x i8> %load.a, %load.b
126  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
127  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
128  %index.next = add i64 %index, 16
129  %loop = icmp eq i64 %index.next, 16384
130  br i1 %loop, label %for.end, label %vector.body
131
132for.end:                                          ; preds = %vector.body
133  ret void
134
135; SSE4-LABEL: test4:
136; SSE4: pmaxsb
137
138; AVX1-LABEL: test4:
139; AVX1: vpmaxsb
140
141; AVX2-LABEL: test4:
142; AVX2: vpmaxsb
143
144; AVX512VL-LABEL: test4:
145; AVX512VL: vpmaxsb
146}
147
148define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
149vector.ph:
150  br label %vector.body
151
152vector.body:                                      ; preds = %vector.body, %vector.ph
153  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
154  %gep.a = getelementptr inbounds i8* %a, i64 %index
155  %gep.b = getelementptr inbounds i8* %b, i64 %index
156  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
157  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
158  %load.a = load <16 x i8>* %ptr.a, align 2
159  %load.b = load <16 x i8>* %ptr.b, align 2
160  %cmp = icmp ult <16 x i8> %load.a, %load.b
161  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
162  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
163  %index.next = add i64 %index, 16
164  %loop = icmp eq i64 %index.next, 16384
165  br i1 %loop, label %for.end, label %vector.body
166
167for.end:                                          ; preds = %vector.body
168  ret void
169
170; SSE2-LABEL: test5:
171; SSE2: pminub
172
173; AVX1-LABEL: test5:
174; AVX1: vpminub
175
176; AVX2-LABEL: test5:
177; AVX2: vpminub
178
179; AVX512VL-LABEL: test5:
180; AVX512VL: vpminub
181}
182
183define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
184vector.ph:
185  br label %vector.body
186
187vector.body:                                      ; preds = %vector.body, %vector.ph
188  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
189  %gep.a = getelementptr inbounds i8* %a, i64 %index
190  %gep.b = getelementptr inbounds i8* %b, i64 %index
191  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
192  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
193  %load.a = load <16 x i8>* %ptr.a, align 2
194  %load.b = load <16 x i8>* %ptr.b, align 2
195  %cmp = icmp ule <16 x i8> %load.a, %load.b
196  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
197  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
198  %index.next = add i64 %index, 16
199  %loop = icmp eq i64 %index.next, 16384
200  br i1 %loop, label %for.end, label %vector.body
201
202for.end:                                          ; preds = %vector.body
203  ret void
204
205; SSE2-LABEL: test6:
206; SSE2: pminub
207
208; AVX1-LABEL: test6:
209; AVX1: vpminub
210
211; AVX2-LABEL: test6:
212; AVX2: vpminub
213
214; AVX512VL-LABEL: test6:
215; AVX512VL: vpminub
216}
217
218define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
219vector.ph:
220  br label %vector.body
221
222vector.body:                                      ; preds = %vector.body, %vector.ph
223  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
224  %gep.a = getelementptr inbounds i8* %a, i64 %index
225  %gep.b = getelementptr inbounds i8* %b, i64 %index
226  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
227  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
228  %load.a = load <16 x i8>* %ptr.a, align 2
229  %load.b = load <16 x i8>* %ptr.b, align 2
230  %cmp = icmp ugt <16 x i8> %load.a, %load.b
231  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
232  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
233  %index.next = add i64 %index, 16
234  %loop = icmp eq i64 %index.next, 16384
235  br i1 %loop, label %for.end, label %vector.body
236
237for.end:                                          ; preds = %vector.body
238  ret void
239
240; SSE2-LABEL: test7:
241; SSE2: pmaxub
242
243; AVX1-LABEL: test7:
244; AVX1: vpmaxub
245
246; AVX2-LABEL: test7:
247; AVX2: vpmaxub
248
249; AVX512VL-LABEL: test7:
250; AVX512VL: vpmaxub
251}
252
253define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
254vector.ph:
255  br label %vector.body
256
257vector.body:                                      ; preds = %vector.body, %vector.ph
258  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
259  %gep.a = getelementptr inbounds i8* %a, i64 %index
260  %gep.b = getelementptr inbounds i8* %b, i64 %index
261  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
262  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
263  %load.a = load <16 x i8>* %ptr.a, align 2
264  %load.b = load <16 x i8>* %ptr.b, align 2
265  %cmp = icmp uge <16 x i8> %load.a, %load.b
266  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
267  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
268  %index.next = add i64 %index, 16
269  %loop = icmp eq i64 %index.next, 16384
270  br i1 %loop, label %for.end, label %vector.body
271
272for.end:                                          ; preds = %vector.body
273  ret void
274
275; SSE2-LABEL: test8:
276; SSE2: pmaxub
277
278; AVX1-LABEL: test8:
279; AVX1: vpmaxub
280
281; AVX2-LABEL: test8:
282; AVX2: vpmaxub
283
284; AVX512VL-LABEL: test8:
285; AVX512VL: vpmaxub
286}
287
288define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
289vector.ph:
290  br label %vector.body
291
292vector.body:                                      ; preds = %vector.body, %vector.ph
293  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
294  %gep.a = getelementptr inbounds i16* %a, i64 %index
295  %gep.b = getelementptr inbounds i16* %b, i64 %index
296  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
297  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
298  %load.a = load <8 x i16>* %ptr.a, align 2
299  %load.b = load <8 x i16>* %ptr.b, align 2
300  %cmp = icmp slt <8 x i16> %load.a, %load.b
301  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
302  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
303  %index.next = add i64 %index, 8
304  %loop = icmp eq i64 %index.next, 16384
305  br i1 %loop, label %for.end, label %vector.body
306
307for.end:                                          ; preds = %vector.body
308  ret void
309
310; SSE2-LABEL: test9:
311; SSE2: pminsw
312
313; AVX1-LABEL: test9:
314; AVX1: vpminsw
315
316; AVX2-LABEL: test9:
317; AVX2: vpminsw
318
319; AVX512VL-LABEL: test9:
320; AVX512VL: vpminsw
321}
322
323define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
324vector.ph:
325  br label %vector.body
326
327vector.body:                                      ; preds = %vector.body, %vector.ph
328  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
329  %gep.a = getelementptr inbounds i16* %a, i64 %index
330  %gep.b = getelementptr inbounds i16* %b, i64 %index
331  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
332  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
333  %load.a = load <8 x i16>* %ptr.a, align 2
334  %load.b = load <8 x i16>* %ptr.b, align 2
335  %cmp = icmp sle <8 x i16> %load.a, %load.b
336  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
337  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
338  %index.next = add i64 %index, 8
339  %loop = icmp eq i64 %index.next, 16384
340  br i1 %loop, label %for.end, label %vector.body
341
342for.end:                                          ; preds = %vector.body
343  ret void
344
345; SSE2-LABEL: test10:
346; SSE2: pminsw
347
348; AVX1-LABEL: test10:
349; AVX1: vpminsw
350
351; AVX2-LABEL: test10:
352; AVX2: vpminsw
353
354; AVX512VL-LABEL: test10:
355; AVX512VL: vpminsw
356}
357
358define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
359vector.ph:
360  br label %vector.body
361
362vector.body:                                      ; preds = %vector.body, %vector.ph
363  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
364  %gep.a = getelementptr inbounds i16* %a, i64 %index
365  %gep.b = getelementptr inbounds i16* %b, i64 %index
366  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
367  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
368  %load.a = load <8 x i16>* %ptr.a, align 2
369  %load.b = load <8 x i16>* %ptr.b, align 2
370  %cmp = icmp sgt <8 x i16> %load.a, %load.b
371  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
372  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
373  %index.next = add i64 %index, 8
374  %loop = icmp eq i64 %index.next, 16384
375  br i1 %loop, label %for.end, label %vector.body
376
377for.end:                                          ; preds = %vector.body
378  ret void
379
380; SSE2-LABEL: test11:
381; SSE2: pmaxsw
382
383; AVX1-LABEL: test11:
384; AVX1: vpmaxsw
385
386; AVX2-LABEL: test11:
387; AVX2: vpmaxsw
388
389; AVX512VL-LABEL: test11:
390; AVX512VL: vpmaxsw
391}
392
393define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
394vector.ph:
395  br label %vector.body
396
397vector.body:                                      ; preds = %vector.body, %vector.ph
398  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
399  %gep.a = getelementptr inbounds i16* %a, i64 %index
400  %gep.b = getelementptr inbounds i16* %b, i64 %index
401  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
402  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
403  %load.a = load <8 x i16>* %ptr.a, align 2
404  %load.b = load <8 x i16>* %ptr.b, align 2
405  %cmp = icmp sge <8 x i16> %load.a, %load.b
406  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
407  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
408  %index.next = add i64 %index, 8
409  %loop = icmp eq i64 %index.next, 16384
410  br i1 %loop, label %for.end, label %vector.body
411
412for.end:                                          ; preds = %vector.body
413  ret void
414
415; SSE2-LABEL: test12:
416; SSE2: pmaxsw
417
418; AVX1-LABEL: test12:
419; AVX1: vpmaxsw
420
421; AVX2-LABEL: test12:
422; AVX2: vpmaxsw
423
424; AVX512VL-LABEL: test12:
425; AVX512VL: vpmaxsw
426}
427
428define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
429vector.ph:
430  br label %vector.body
431
432vector.body:                                      ; preds = %vector.body, %vector.ph
433  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
434  %gep.a = getelementptr inbounds i16* %a, i64 %index
435  %gep.b = getelementptr inbounds i16* %b, i64 %index
436  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
437  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
438  %load.a = load <8 x i16>* %ptr.a, align 2
439  %load.b = load <8 x i16>* %ptr.b, align 2
440  %cmp = icmp ult <8 x i16> %load.a, %load.b
441  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
442  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
443  %index.next = add i64 %index, 8
444  %loop = icmp eq i64 %index.next, 16384
445  br i1 %loop, label %for.end, label %vector.body
446
447for.end:                                          ; preds = %vector.body
448  ret void
449
450; SSE4-LABEL: test13:
451; SSE4: pminuw
452
453; AVX1-LABEL: test13:
454; AVX1: vpminuw
455
456; AVX2-LABEL: test13:
457; AVX2: vpminuw
458
459; AVX512VL-LABEL: test13:
460; AVX512VL: vpminuw
461}
462
463define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
464vector.ph:
465  br label %vector.body
466
467vector.body:                                      ; preds = %vector.body, %vector.ph
468  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
469  %gep.a = getelementptr inbounds i16* %a, i64 %index
470  %gep.b = getelementptr inbounds i16* %b, i64 %index
471  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
472  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
473  %load.a = load <8 x i16>* %ptr.a, align 2
474  %load.b = load <8 x i16>* %ptr.b, align 2
475  %cmp = icmp ule <8 x i16> %load.a, %load.b
476  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
477  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
478  %index.next = add i64 %index, 8
479  %loop = icmp eq i64 %index.next, 16384
480  br i1 %loop, label %for.end, label %vector.body
481
482for.end:                                          ; preds = %vector.body
483  ret void
484
485; SSE4-LABEL: test14:
486; SSE4: pminuw
487
488; AVX1-LABEL: test14:
489; AVX1: vpminuw
490
491; AVX2-LABEL: test14:
492; AVX2: vpminuw
493
494; AVX512VL-LABEL: test14:
495; AVX512VL: vpminuw
496}
497
498define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
499vector.ph:
500  br label %vector.body
501
502vector.body:                                      ; preds = %vector.body, %vector.ph
503  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
504  %gep.a = getelementptr inbounds i16* %a, i64 %index
505  %gep.b = getelementptr inbounds i16* %b, i64 %index
506  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
507  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
508  %load.a = load <8 x i16>* %ptr.a, align 2
509  %load.b = load <8 x i16>* %ptr.b, align 2
510  %cmp = icmp ugt <8 x i16> %load.a, %load.b
511  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
512  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
513  %index.next = add i64 %index, 8
514  %loop = icmp eq i64 %index.next, 16384
515  br i1 %loop, label %for.end, label %vector.body
516
517for.end:                                          ; preds = %vector.body
518  ret void
519
520; SSE4-LABEL: test15:
521; SSE4: pmaxuw
522
523; AVX1-LABEL: test15:
524; AVX1: vpmaxuw
525
526; AVX2-LABEL: test15:
527; AVX2: vpmaxuw
528
529; AVX512VL-LABEL: test15:
530; AVX512VL: vpmaxuw
531}
532
533define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
534vector.ph:
535  br label %vector.body
536
537vector.body:                                      ; preds = %vector.body, %vector.ph
538  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
539  %gep.a = getelementptr inbounds i16* %a, i64 %index
540  %gep.b = getelementptr inbounds i16* %b, i64 %index
541  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
542  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
543  %load.a = load <8 x i16>* %ptr.a, align 2
544  %load.b = load <8 x i16>* %ptr.b, align 2
545  %cmp = icmp uge <8 x i16> %load.a, %load.b
546  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
547  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
548  %index.next = add i64 %index, 8
549  %loop = icmp eq i64 %index.next, 16384
550  br i1 %loop, label %for.end, label %vector.body
551
552for.end:                                          ; preds = %vector.body
553  ret void
554
555; SSE4-LABEL: test16:
556; SSE4: pmaxuw
557
558; AVX1-LABEL: test16:
559; AVX1: vpmaxuw
560
561; AVX2-LABEL: test16:
562; AVX2: vpmaxuw
563
564; AVX512VL-LABEL: test16:
565; AVX512VL: vpmaxuw
566}
567
568define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
569vector.ph:
570  br label %vector.body
571
572vector.body:                                      ; preds = %vector.body, %vector.ph
573  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
574  %gep.a = getelementptr inbounds i32* %a, i64 %index
575  %gep.b = getelementptr inbounds i32* %b, i64 %index
576  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
577  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
578  %load.a = load <4 x i32>* %ptr.a, align 2
579  %load.b = load <4 x i32>* %ptr.b, align 2
580  %cmp = icmp slt <4 x i32> %load.a, %load.b
581  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
582  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
583  %index.next = add i64 %index, 4
584  %loop = icmp eq i64 %index.next, 16384
585  br i1 %loop, label %for.end, label %vector.body
586
587for.end:                                          ; preds = %vector.body
588  ret void
589
590; SSE4-LABEL: test17:
591; SSE4: pminsd
592
593; AVX1-LABEL: test17:
594; AVX1: vpminsd
595
596; AVX2-LABEL: test17:
597; AVX2: vpminsd
598
599; AVX512VL-LABEL: test17:
600; AVX512VL: vpminsd
601}
602
603define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
604vector.ph:
605  br label %vector.body
606
607vector.body:                                      ; preds = %vector.body, %vector.ph
608  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
609  %gep.a = getelementptr inbounds i32* %a, i64 %index
610  %gep.b = getelementptr inbounds i32* %b, i64 %index
611  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
612  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
613  %load.a = load <4 x i32>* %ptr.a, align 2
614  %load.b = load <4 x i32>* %ptr.b, align 2
615  %cmp = icmp sle <4 x i32> %load.a, %load.b
616  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
617  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
618  %index.next = add i64 %index, 4
619  %loop = icmp eq i64 %index.next, 16384
620  br i1 %loop, label %for.end, label %vector.body
621
622for.end:                                          ; preds = %vector.body
623  ret void
624
625; SSE4-LABEL: test18:
626; SSE4: pminsd
627
628; AVX1-LABEL: test18:
629; AVX1: vpminsd
630
631; AVX2-LABEL: test18:
632; AVX2: vpminsd
633
634; AVX512VL-LABEL: test18:
635; AVX512VL: vpminsd
636}
637
638define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
639vector.ph:
640  br label %vector.body
641
642vector.body:                                      ; preds = %vector.body, %vector.ph
643  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
644  %gep.a = getelementptr inbounds i32* %a, i64 %index
645  %gep.b = getelementptr inbounds i32* %b, i64 %index
646  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
647  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
648  %load.a = load <4 x i32>* %ptr.a, align 2
649  %load.b = load <4 x i32>* %ptr.b, align 2
650  %cmp = icmp sgt <4 x i32> %load.a, %load.b
651  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
652  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
653  %index.next = add i64 %index, 4
654  %loop = icmp eq i64 %index.next, 16384
655  br i1 %loop, label %for.end, label %vector.body
656
657for.end:                                          ; preds = %vector.body
658  ret void
659
660; SSE4-LABEL: test19:
661; SSE4: pmaxsd
662
663; AVX1-LABEL: test19:
664; AVX1: vpmaxsd
665
666; AVX2-LABEL: test19:
667; AVX2: vpmaxsd
668
669; AVX512VL-LABEL: test19:
670; AVX512VL: vpmaxsd
671}
672
673define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
674vector.ph:
675  br label %vector.body
676
677vector.body:                                      ; preds = %vector.body, %vector.ph
678  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
679  %gep.a = getelementptr inbounds i32* %a, i64 %index
680  %gep.b = getelementptr inbounds i32* %b, i64 %index
681  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
682  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
683  %load.a = load <4 x i32>* %ptr.a, align 2
684  %load.b = load <4 x i32>* %ptr.b, align 2
685  %cmp = icmp sge <4 x i32> %load.a, %load.b
686  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
687  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
688  %index.next = add i64 %index, 4
689  %loop = icmp eq i64 %index.next, 16384
690  br i1 %loop, label %for.end, label %vector.body
691
692for.end:                                          ; preds = %vector.body
693  ret void
694
695; SSE4-LABEL: test20:
696; SSE4: pmaxsd
697
698; AVX1-LABEL: test20:
699; AVX1: vpmaxsd
700
701; AVX2-LABEL: test20:
702; AVX2: vpmaxsd
703
704; AVX512VL-LABEL: test20:
705; AVX512VL: vpmaxsd
706}
707
708define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
709vector.ph:
710  br label %vector.body
711
712vector.body:                                      ; preds = %vector.body, %vector.ph
713  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
714  %gep.a = getelementptr inbounds i32* %a, i64 %index
715  %gep.b = getelementptr inbounds i32* %b, i64 %index
716  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
717  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
718  %load.a = load <4 x i32>* %ptr.a, align 2
719  %load.b = load <4 x i32>* %ptr.b, align 2
720  %cmp = icmp ult <4 x i32> %load.a, %load.b
721  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
722  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
723  %index.next = add i64 %index, 4
724  %loop = icmp eq i64 %index.next, 16384
725  br i1 %loop, label %for.end, label %vector.body
726
727for.end:                                          ; preds = %vector.body
728  ret void
729
730; SSE4-LABEL: test21:
731; SSE4: pminud
732
733; AVX1-LABEL: test21:
734; AVX1: vpminud
735
736; AVX2-LABEL: test21:
737; AVX2: vpminud
738
739; AVX512VL-LABEL: test21:
740; AVX512VL: vpminud
741}
742
743define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
744vector.ph:
745  br label %vector.body
746
747vector.body:                                      ; preds = %vector.body, %vector.ph
748  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
749  %gep.a = getelementptr inbounds i32* %a, i64 %index
750  %gep.b = getelementptr inbounds i32* %b, i64 %index
751  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
752  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
753  %load.a = load <4 x i32>* %ptr.a, align 2
754  %load.b = load <4 x i32>* %ptr.b, align 2
755  %cmp = icmp ule <4 x i32> %load.a, %load.b
756  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
757  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
758  %index.next = add i64 %index, 4
759  %loop = icmp eq i64 %index.next, 16384
760  br i1 %loop, label %for.end, label %vector.body
761
762for.end:                                          ; preds = %vector.body
763  ret void
764
765; SSE4-LABEL: test22:
766; SSE4: pminud
767
768; AVX1-LABEL: test22:
769; AVX1: vpminud
770
771; AVX2-LABEL: test22:
772; AVX2: vpminud
773
774; AVX512VL-LABEL: test22:
775; AVX512VL: vpminud
776}
777
778define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
779vector.ph:
780  br label %vector.body
781
782vector.body:                                      ; preds = %vector.body, %vector.ph
783  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
784  %gep.a = getelementptr inbounds i32* %a, i64 %index
785  %gep.b = getelementptr inbounds i32* %b, i64 %index
786  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
787  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
788  %load.a = load <4 x i32>* %ptr.a, align 2
789  %load.b = load <4 x i32>* %ptr.b, align 2
790  %cmp = icmp ugt <4 x i32> %load.a, %load.b
791  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
792  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
793  %index.next = add i64 %index, 4
794  %loop = icmp eq i64 %index.next, 16384
795  br i1 %loop, label %for.end, label %vector.body
796
797for.end:                                          ; preds = %vector.body
798  ret void
799
800; SSE4-LABEL: test23:
801; SSE4: pmaxud
802
803; AVX1-LABEL: test23:
804; AVX1: vpmaxud
805
806; AVX2-LABEL: test23:
807; AVX2: vpmaxud
808
809; AVX512VL-LABEL: test23:
810; AVX512VL: vpmaxud
811}
812
813define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
814vector.ph:
815  br label %vector.body
816
817vector.body:                                      ; preds = %vector.body, %vector.ph
818  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
819  %gep.a = getelementptr inbounds i32* %a, i64 %index
820  %gep.b = getelementptr inbounds i32* %b, i64 %index
821  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
822  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
823  %load.a = load <4 x i32>* %ptr.a, align 2
824  %load.b = load <4 x i32>* %ptr.b, align 2
825  %cmp = icmp uge <4 x i32> %load.a, %load.b
826  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
827  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
828  %index.next = add i64 %index, 4
829  %loop = icmp eq i64 %index.next, 16384
830  br i1 %loop, label %for.end, label %vector.body
831
832for.end:                                          ; preds = %vector.body
833  ret void
834
835; SSE4-LABEL: test24:
836; SSE4: pmaxud
837
838; AVX1-LABEL: test24:
839; AVX1: vpmaxud
840
841; AVX2-LABEL: test24:
842; AVX2: vpmaxud
843
844; AVX512VL-LABEL: test24:
845; AVX512VL: vpmaxud
846}
847
848define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
849vector.ph:
850  br label %vector.body
851
852vector.body:                                      ; preds = %vector.body, %vector.ph
853  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
854  %gep.a = getelementptr inbounds i8* %a, i64 %index
855  %gep.b = getelementptr inbounds i8* %b, i64 %index
856  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
857  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
858  %load.a = load <32 x i8>* %ptr.a, align 2
859  %load.b = load <32 x i8>* %ptr.b, align 2
860  %cmp = icmp slt <32 x i8> %load.a, %load.b
861  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
862  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
863  %index.next = add i64 %index, 32
864  %loop = icmp eq i64 %index.next, 16384
865  br i1 %loop, label %for.end, label %vector.body
866
867for.end:                                          ; preds = %vector.body
868  ret void
869
870; AVX2-LABEL: test25:
871; AVX2: vpminsb
872
873; AVX512VL-LABEL: test25:
874; AVX512VL: vpminsb
875}
876
877define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
878vector.ph:
879  br label %vector.body
880
881vector.body:                                      ; preds = %vector.body, %vector.ph
882  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
883  %gep.a = getelementptr inbounds i8* %a, i64 %index
884  %gep.b = getelementptr inbounds i8* %b, i64 %index
885  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
886  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
887  %load.a = load <32 x i8>* %ptr.a, align 2
888  %load.b = load <32 x i8>* %ptr.b, align 2
889  %cmp = icmp sle <32 x i8> %load.a, %load.b
890  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
891  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
892  %index.next = add i64 %index, 32
893  %loop = icmp eq i64 %index.next, 16384
894  br i1 %loop, label %for.end, label %vector.body
895
896for.end:                                          ; preds = %vector.body
897  ret void
898
899; AVX2-LABEL: test26:
900; AVX2: vpminsb
901
902; AVX512VL-LABEL: test26:
903; AVX512VL: vpminsb
904}
905
906define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
907vector.ph:
908  br label %vector.body
909
910vector.body:                                      ; preds = %vector.body, %vector.ph
911  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
912  %gep.a = getelementptr inbounds i8* %a, i64 %index
913  %gep.b = getelementptr inbounds i8* %b, i64 %index
914  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
915  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
916  %load.a = load <32 x i8>* %ptr.a, align 2
917  %load.b = load <32 x i8>* %ptr.b, align 2
918  %cmp = icmp sgt <32 x i8> %load.a, %load.b
919  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
920  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
921  %index.next = add i64 %index, 32
922  %loop = icmp eq i64 %index.next, 16384
923  br i1 %loop, label %for.end, label %vector.body
924
925for.end:                                          ; preds = %vector.body
926  ret void
927
928; AVX2-LABEL: test27:
929; AVX2: vpmaxsb
930
931; AVX512VL-LABEL: test27:
932; AVX512VL: vpmaxsb
933}
934
935define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
936vector.ph:
937  br label %vector.body
938
939vector.body:                                      ; preds = %vector.body, %vector.ph
940  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
941  %gep.a = getelementptr inbounds i8* %a, i64 %index
942  %gep.b = getelementptr inbounds i8* %b, i64 %index
943  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
944  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
945  %load.a = load <32 x i8>* %ptr.a, align 2
946  %load.b = load <32 x i8>* %ptr.b, align 2
947  %cmp = icmp sge <32 x i8> %load.a, %load.b
948  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
949  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
950  %index.next = add i64 %index, 32
951  %loop = icmp eq i64 %index.next, 16384
952  br i1 %loop, label %for.end, label %vector.body
953
954for.end:                                          ; preds = %vector.body
955  ret void
956
957; AVX2-LABEL: test28:
958; AVX2: vpmaxsb
959
960; AVX512VL-LABEL: test28:
961; AVX512VL: vpmaxsb
962}
963
964define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
965vector.ph:
966  br label %vector.body
967
968vector.body:                                      ; preds = %vector.body, %vector.ph
969  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
970  %gep.a = getelementptr inbounds i8* %a, i64 %index
971  %gep.b = getelementptr inbounds i8* %b, i64 %index
972  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
973  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
974  %load.a = load <32 x i8>* %ptr.a, align 2
975  %load.b = load <32 x i8>* %ptr.b, align 2
976  %cmp = icmp ult <32 x i8> %load.a, %load.b
977  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
978  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
979  %index.next = add i64 %index, 32
980  %loop = icmp eq i64 %index.next, 16384
981  br i1 %loop, label %for.end, label %vector.body
982
983for.end:                                          ; preds = %vector.body
984  ret void
985
986; AVX2-LABEL: test29:
987; AVX2: vpminub
988
989; AVX512VL-LABEL: test29:
990; AVX512VL: vpminub
991}
992
993define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
994vector.ph:
995  br label %vector.body
996
997vector.body:                                      ; preds = %vector.body, %vector.ph
998  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
999  %gep.a = getelementptr inbounds i8* %a, i64 %index
1000  %gep.b = getelementptr inbounds i8* %b, i64 %index
1001  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
1002  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
1003  %load.a = load <32 x i8>* %ptr.a, align 2
1004  %load.b = load <32 x i8>* %ptr.b, align 2
1005  %cmp = icmp ule <32 x i8> %load.a, %load.b
1006  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
1007  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
1008  %index.next = add i64 %index, 32
1009  %loop = icmp eq i64 %index.next, 16384
1010  br i1 %loop, label %for.end, label %vector.body
1011
1012for.end:                                          ; preds = %vector.body
1013  ret void
1014
1015; AVX2-LABEL: test30:
1016; AVX2: vpminub
1017
1018; AVX512VL-LABEL: test30:
1019; AVX512VL: vpminub
1020}
1021
1022define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
1023vector.ph:
1024  br label %vector.body
1025
1026vector.body:                                      ; preds = %vector.body, %vector.ph
1027  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1028  %gep.a = getelementptr inbounds i8* %a, i64 %index
1029  %gep.b = getelementptr inbounds i8* %b, i64 %index
1030  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
1031  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
1032  %load.a = load <32 x i8>* %ptr.a, align 2
1033  %load.b = load <32 x i8>* %ptr.b, align 2
1034  %cmp = icmp ugt <32 x i8> %load.a, %load.b
1035  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
1036  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
1037  %index.next = add i64 %index, 32
1038  %loop = icmp eq i64 %index.next, 16384
1039  br i1 %loop, label %for.end, label %vector.body
1040
1041for.end:                                          ; preds = %vector.body
1042  ret void
1043
1044; AVX2-LABEL: test31:
1045; AVX2: vpmaxub
1046
1047; AVX512VL-LABEL: test31:
1048; AVX512VL: vpmaxub
1049}
1050
1051define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
1052vector.ph:
1053  br label %vector.body
1054
1055vector.body:                                      ; preds = %vector.body, %vector.ph
1056  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1057  %gep.a = getelementptr inbounds i8* %a, i64 %index
1058  %gep.b = getelementptr inbounds i8* %b, i64 %index
1059  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
1060  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
1061  %load.a = load <32 x i8>* %ptr.a, align 2
1062  %load.b = load <32 x i8>* %ptr.b, align 2
1063  %cmp = icmp uge <32 x i8> %load.a, %load.b
1064  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
1065  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
1066  %index.next = add i64 %index, 32
1067  %loop = icmp eq i64 %index.next, 16384
1068  br i1 %loop, label %for.end, label %vector.body
1069
1070for.end:                                          ; preds = %vector.body
1071  ret void
1072
1073; AVX2-LABEL: test32:
1074; AVX2: vpmaxub
1075
1076; AVX512VL-LABEL: test32:
1077; AVX512VL: vpmaxub
1078}
1079
1080define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
1081vector.ph:
1082  br label %vector.body
1083
1084vector.body:                                      ; preds = %vector.body, %vector.ph
1085  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1086  %gep.a = getelementptr inbounds i16* %a, i64 %index
1087  %gep.b = getelementptr inbounds i16* %b, i64 %index
1088  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1089  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1090  %load.a = load <16 x i16>* %ptr.a, align 2
1091  %load.b = load <16 x i16>* %ptr.b, align 2
1092  %cmp = icmp slt <16 x i16> %load.a, %load.b
1093  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1094  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1095  %index.next = add i64 %index, 16
1096  %loop = icmp eq i64 %index.next, 16384
1097  br i1 %loop, label %for.end, label %vector.body
1098
1099for.end:                                          ; preds = %vector.body
1100  ret void
1101
1102; AVX2-LABEL: test33:
1103; AVX2: vpminsw
1104
1105; AVX512VL-LABEL: test33:
1106; AVX512VL: vpminsw
1107}
1108
1109define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
1110vector.ph:
1111  br label %vector.body
1112
1113vector.body:                                      ; preds = %vector.body, %vector.ph
1114  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1115  %gep.a = getelementptr inbounds i16* %a, i64 %index
1116  %gep.b = getelementptr inbounds i16* %b, i64 %index
1117  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1118  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1119  %load.a = load <16 x i16>* %ptr.a, align 2
1120  %load.b = load <16 x i16>* %ptr.b, align 2
1121  %cmp = icmp sle <16 x i16> %load.a, %load.b
1122  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1123  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1124  %index.next = add i64 %index, 16
1125  %loop = icmp eq i64 %index.next, 16384
1126  br i1 %loop, label %for.end, label %vector.body
1127
1128for.end:                                          ; preds = %vector.body
1129  ret void
1130
1131; AVX2-LABEL: test34:
1132; AVX2: vpminsw
1133
1134; AVX512VL-LABEL: test34:
1135; AVX512VL: vpminsw
1136}
1137
1138define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
1139vector.ph:
1140  br label %vector.body
1141
1142vector.body:                                      ; preds = %vector.body, %vector.ph
1143  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1144  %gep.a = getelementptr inbounds i16* %a, i64 %index
1145  %gep.b = getelementptr inbounds i16* %b, i64 %index
1146  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1147  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1148  %load.a = load <16 x i16>* %ptr.a, align 2
1149  %load.b = load <16 x i16>* %ptr.b, align 2
1150  %cmp = icmp sgt <16 x i16> %load.a, %load.b
1151  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1152  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1153  %index.next = add i64 %index, 16
1154  %loop = icmp eq i64 %index.next, 16384
1155  br i1 %loop, label %for.end, label %vector.body
1156
1157for.end:                                          ; preds = %vector.body
1158  ret void
1159
1160; AVX2-LABEL: test35:
1161; AVX2: vpmaxsw
1162
1163; AVX512VL-LABEL: test35:
1164; AVX512VL: vpmaxsw
1165}
1166
1167define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
1168vector.ph:
1169  br label %vector.body
1170
1171vector.body:                                      ; preds = %vector.body, %vector.ph
1172  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1173  %gep.a = getelementptr inbounds i16* %a, i64 %index
1174  %gep.b = getelementptr inbounds i16* %b, i64 %index
1175  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1176  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1177  %load.a = load <16 x i16>* %ptr.a, align 2
1178  %load.b = load <16 x i16>* %ptr.b, align 2
1179  %cmp = icmp sge <16 x i16> %load.a, %load.b
1180  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1181  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1182  %index.next = add i64 %index, 16
1183  %loop = icmp eq i64 %index.next, 16384
1184  br i1 %loop, label %for.end, label %vector.body
1185
1186for.end:                                          ; preds = %vector.body
1187  ret void
1188
1189; AVX2-LABEL: test36:
1190; AVX2: vpmaxsw
1191
1192; AVX512VL-LABEL: test36:
1193; AVX512VL: vpmaxsw
1194}
1195
1196define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
1197vector.ph:
1198  br label %vector.body
1199
1200vector.body:                                      ; preds = %vector.body, %vector.ph
1201  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1202  %gep.a = getelementptr inbounds i16* %a, i64 %index
1203  %gep.b = getelementptr inbounds i16* %b, i64 %index
1204  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1205  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1206  %load.a = load <16 x i16>* %ptr.a, align 2
1207  %load.b = load <16 x i16>* %ptr.b, align 2
1208  %cmp = icmp ult <16 x i16> %load.a, %load.b
1209  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1210  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1211  %index.next = add i64 %index, 16
1212  %loop = icmp eq i64 %index.next, 16384
1213  br i1 %loop, label %for.end, label %vector.body
1214
1215for.end:                                          ; preds = %vector.body
1216  ret void
1217
1218; AVX2-LABEL: test37:
1219; AVX2: vpminuw
1220
1221; AVX512VL-LABEL: test37:
1222; AVX512VL: vpminuw
1223}
1224
1225define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
1226vector.ph:
1227  br label %vector.body
1228
1229vector.body:                                      ; preds = %vector.body, %vector.ph
1230  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1231  %gep.a = getelementptr inbounds i16* %a, i64 %index
1232  %gep.b = getelementptr inbounds i16* %b, i64 %index
1233  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1234  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1235  %load.a = load <16 x i16>* %ptr.a, align 2
1236  %load.b = load <16 x i16>* %ptr.b, align 2
1237  %cmp = icmp ule <16 x i16> %load.a, %load.b
1238  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1239  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1240  %index.next = add i64 %index, 16
1241  %loop = icmp eq i64 %index.next, 16384
1242  br i1 %loop, label %for.end, label %vector.body
1243
1244for.end:                                          ; preds = %vector.body
1245  ret void
1246
1247; AVX2-LABEL: test38:
1248; AVX2: vpminuw
1249
1250; AVX512VL-LABEL: test38:
1251; AVX512VL: vpminuw
1252}
1253
1254define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
1255vector.ph:
1256  br label %vector.body
1257
1258vector.body:                                      ; preds = %vector.body, %vector.ph
1259  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1260  %gep.a = getelementptr inbounds i16* %a, i64 %index
1261  %gep.b = getelementptr inbounds i16* %b, i64 %index
1262  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1263  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1264  %load.a = load <16 x i16>* %ptr.a, align 2
1265  %load.b = load <16 x i16>* %ptr.b, align 2
1266  %cmp = icmp ugt <16 x i16> %load.a, %load.b
1267  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1268  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1269  %index.next = add i64 %index, 16
1270  %loop = icmp eq i64 %index.next, 16384
1271  br i1 %loop, label %for.end, label %vector.body
1272
1273for.end:                                          ; preds = %vector.body
1274  ret void
1275
1276; AVX2-LABEL: test39:
1277; AVX2: vpmaxuw
1278
1279; AVX512VL-LABEL: test39:
1280; AVX512VL: vpmaxuw
1281}
1282
1283define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
1284vector.ph:
1285  br label %vector.body
1286
1287vector.body:                                      ; preds = %vector.body, %vector.ph
1288  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1289  %gep.a = getelementptr inbounds i16* %a, i64 %index
1290  %gep.b = getelementptr inbounds i16* %b, i64 %index
1291  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1292  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1293  %load.a = load <16 x i16>* %ptr.a, align 2
1294  %load.b = load <16 x i16>* %ptr.b, align 2
1295  %cmp = icmp uge <16 x i16> %load.a, %load.b
1296  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1297  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1298  %index.next = add i64 %index, 16
1299  %loop = icmp eq i64 %index.next, 16384
1300  br i1 %loop, label %for.end, label %vector.body
1301
1302for.end:                                          ; preds = %vector.body
1303  ret void
1304
1305; AVX2-LABEL: test40:
1306; AVX2: vpmaxuw
1307
1308; AVX512VL-LABEL: test40:
1309; AVX512VL: vpmaxuw
1310}
1311
1312define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
1313vector.ph:
1314  br label %vector.body
1315
1316vector.body:                                      ; preds = %vector.body, %vector.ph
1317  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1318  %gep.a = getelementptr inbounds i32* %a, i64 %index
1319  %gep.b = getelementptr inbounds i32* %b, i64 %index
1320  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1321  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1322  %load.a = load <8 x i32>* %ptr.a, align 2
1323  %load.b = load <8 x i32>* %ptr.b, align 2
1324  %cmp = icmp slt <8 x i32> %load.a, %load.b
1325  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1326  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1327  %index.next = add i64 %index, 8
1328  %loop = icmp eq i64 %index.next, 16384
1329  br i1 %loop, label %for.end, label %vector.body
1330
1331for.end:                                          ; preds = %vector.body
1332  ret void
1333
1334; AVX2-LABEL: test41:
1335; AVX2: vpminsd
1336
1337; AVX512VL-LABEL: test41:
1338; AVX512VL: vpminsd
1339}
1340
1341define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
1342vector.ph:
1343  br label %vector.body
1344
1345vector.body:                                      ; preds = %vector.body, %vector.ph
1346  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1347  %gep.a = getelementptr inbounds i32* %a, i64 %index
1348  %gep.b = getelementptr inbounds i32* %b, i64 %index
1349  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1350  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1351  %load.a = load <8 x i32>* %ptr.a, align 2
1352  %load.b = load <8 x i32>* %ptr.b, align 2
1353  %cmp = icmp sle <8 x i32> %load.a, %load.b
1354  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1355  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1356  %index.next = add i64 %index, 8
1357  %loop = icmp eq i64 %index.next, 16384
1358  br i1 %loop, label %for.end, label %vector.body
1359
1360for.end:                                          ; preds = %vector.body
1361  ret void
1362
1363; AVX2-LABEL: test42:
1364; AVX2: vpminsd
1365
1366; AVX512VL-LABEL: test42:
1367; AVX512VL: vpminsd
1368}
1369
1370define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
1371vector.ph:
1372  br label %vector.body
1373
1374vector.body:                                      ; preds = %vector.body, %vector.ph
1375  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1376  %gep.a = getelementptr inbounds i32* %a, i64 %index
1377  %gep.b = getelementptr inbounds i32* %b, i64 %index
1378  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1379  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1380  %load.a = load <8 x i32>* %ptr.a, align 2
1381  %load.b = load <8 x i32>* %ptr.b, align 2
1382  %cmp = icmp sgt <8 x i32> %load.a, %load.b
1383  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1384  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1385  %index.next = add i64 %index, 8
1386  %loop = icmp eq i64 %index.next, 16384
1387  br i1 %loop, label %for.end, label %vector.body
1388
1389for.end:                                          ; preds = %vector.body
1390  ret void
1391
1392; AVX2-LABEL: test43:
1393; AVX2: vpmaxsd
1394
1395; AVX512VL-LABEL: test43:
1396; AVX512VL: vpmaxsd
1397}
1398
1399define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
1400vector.ph:
1401  br label %vector.body
1402
1403vector.body:                                      ; preds = %vector.body, %vector.ph
1404  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1405  %gep.a = getelementptr inbounds i32* %a, i64 %index
1406  %gep.b = getelementptr inbounds i32* %b, i64 %index
1407  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1408  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1409  %load.a = load <8 x i32>* %ptr.a, align 2
1410  %load.b = load <8 x i32>* %ptr.b, align 2
1411  %cmp = icmp sge <8 x i32> %load.a, %load.b
1412  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1413  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1414  %index.next = add i64 %index, 8
1415  %loop = icmp eq i64 %index.next, 16384
1416  br i1 %loop, label %for.end, label %vector.body
1417
1418for.end:                                          ; preds = %vector.body
1419  ret void
1420
1421; AVX2-LABEL: test44:
1422; AVX2: vpmaxsd
1423
1424; AVX512VL-LABEL: test44:
1425; AVX512VL: vpmaxsd
1426}
1427
1428define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
1429vector.ph:
1430  br label %vector.body
1431
1432vector.body:                                      ; preds = %vector.body, %vector.ph
1433  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1434  %gep.a = getelementptr inbounds i32* %a, i64 %index
1435  %gep.b = getelementptr inbounds i32* %b, i64 %index
1436  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1437  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1438  %load.a = load <8 x i32>* %ptr.a, align 2
1439  %load.b = load <8 x i32>* %ptr.b, align 2
1440  %cmp = icmp ult <8 x i32> %load.a, %load.b
1441  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1442  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1443  %index.next = add i64 %index, 8
1444  %loop = icmp eq i64 %index.next, 16384
1445  br i1 %loop, label %for.end, label %vector.body
1446
1447for.end:                                          ; preds = %vector.body
1448  ret void
1449
1450; AVX2-LABEL: test45:
1451; AVX2: vpminud
1452
1453; AVX512VL-LABEL: test45:
1454; AVX512VL: vpminud
1455}
1456
1457define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
1458vector.ph:
1459  br label %vector.body
1460
1461vector.body:                                      ; preds = %vector.body, %vector.ph
1462  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1463  %gep.a = getelementptr inbounds i32* %a, i64 %index
1464  %gep.b = getelementptr inbounds i32* %b, i64 %index
1465  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1466  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1467  %load.a = load <8 x i32>* %ptr.a, align 2
1468  %load.b = load <8 x i32>* %ptr.b, align 2
1469  %cmp = icmp ule <8 x i32> %load.a, %load.b
1470  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1471  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1472  %index.next = add i64 %index, 8
1473  %loop = icmp eq i64 %index.next, 16384
1474  br i1 %loop, label %for.end, label %vector.body
1475
1476for.end:                                          ; preds = %vector.body
1477  ret void
1478
1479; AVX2-LABEL: test46:
1480; AVX2: vpminud
1481
1482; AVX512VL-LABEL: test46:
1483; AVX512VL: vpminud
1484}
1485
1486define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
1487vector.ph:
1488  br label %vector.body
1489
1490vector.body:                                      ; preds = %vector.body, %vector.ph
1491  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1492  %gep.a = getelementptr inbounds i32* %a, i64 %index
1493  %gep.b = getelementptr inbounds i32* %b, i64 %index
1494  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1495  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1496  %load.a = load <8 x i32>* %ptr.a, align 2
1497  %load.b = load <8 x i32>* %ptr.b, align 2
1498  %cmp = icmp ugt <8 x i32> %load.a, %load.b
1499  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1500  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1501  %index.next = add i64 %index, 8
1502  %loop = icmp eq i64 %index.next, 16384
1503  br i1 %loop, label %for.end, label %vector.body
1504
1505for.end:                                          ; preds = %vector.body
1506  ret void
1507
1508; AVX2-LABEL: test47:
1509; AVX2: vpmaxud
1510
1511; AVX512VL-LABEL: test47:
1512; AVX512VL: vpmaxud
1513}
1514
1515define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
1516vector.ph:
1517  br label %vector.body
1518
1519vector.body:                                      ; preds = %vector.body, %vector.ph
1520  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1521  %gep.a = getelementptr inbounds i32* %a, i64 %index
1522  %gep.b = getelementptr inbounds i32* %b, i64 %index
1523  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1524  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1525  %load.a = load <8 x i32>* %ptr.a, align 2
1526  %load.b = load <8 x i32>* %ptr.b, align 2
1527  %cmp = icmp uge <8 x i32> %load.a, %load.b
1528  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1529  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1530  %index.next = add i64 %index, 8
1531  %loop = icmp eq i64 %index.next, 16384
1532  br i1 %loop, label %for.end, label %vector.body
1533
1534for.end:                                          ; preds = %vector.body
1535  ret void
1536
1537; AVX2-LABEL: test48:
1538; AVX2: vpmaxud
1539
1540; AVX512VL-LABEL: test48:
1541; AVX512VL: vpmaxud
1542}
1543
1544define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
1545vector.ph:
1546  br label %vector.body
1547
1548vector.body:                                      ; preds = %vector.body, %vector.ph
1549  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1550  %gep.a = getelementptr inbounds i8* %a, i64 %index
1551  %gep.b = getelementptr inbounds i8* %b, i64 %index
1552  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1553  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1554  %load.a = load <16 x i8>* %ptr.a, align 2
1555  %load.b = load <16 x i8>* %ptr.b, align 2
1556  %cmp = icmp slt <16 x i8> %load.a, %load.b
1557  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1558  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1559  %index.next = add i64 %index, 16
1560  %loop = icmp eq i64 %index.next, 16384
1561  br i1 %loop, label %for.end, label %vector.body
1562
1563for.end:                                          ; preds = %vector.body
1564  ret void
1565
1566; SSE4-LABEL: test49:
1567; SSE4: pmaxsb
1568
1569; AVX1-LABEL: test49:
1570; AVX1: vpmaxsb
1571
1572; AVX2-LABEL: test49:
1573; AVX2: vpmaxsb
1574
1575; AVX512VL-LABEL: test49:
1576; AVX512VL: vpmaxsb
1577}
1578
1579define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
1580vector.ph:
1581  br label %vector.body
1582
1583vector.body:                                      ; preds = %vector.body, %vector.ph
1584  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1585  %gep.a = getelementptr inbounds i8* %a, i64 %index
1586  %gep.b = getelementptr inbounds i8* %b, i64 %index
1587  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1588  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1589  %load.a = load <16 x i8>* %ptr.a, align 2
1590  %load.b = load <16 x i8>* %ptr.b, align 2
1591  %cmp = icmp sle <16 x i8> %load.a, %load.b
1592  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1593  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1594  %index.next = add i64 %index, 16
1595  %loop = icmp eq i64 %index.next, 16384
1596  br i1 %loop, label %for.end, label %vector.body
1597
1598for.end:                                          ; preds = %vector.body
1599  ret void
1600
1601; SSE4-LABEL: test50:
1602; SSE4: pmaxsb
1603
1604; AVX1-LABEL: test50:
1605; AVX1: vpmaxsb
1606
1607; AVX2-LABEL: test50:
1608; AVX2: vpmaxsb
1609
1610; AVX512VL-LABEL: test50:
1611; AVX512VL: vpmaxsb
1612}
1613
1614define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
1615vector.ph:
1616  br label %vector.body
1617
1618vector.body:                                      ; preds = %vector.body, %vector.ph
1619  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1620  %gep.a = getelementptr inbounds i8* %a, i64 %index
1621  %gep.b = getelementptr inbounds i8* %b, i64 %index
1622  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1623  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1624  %load.a = load <16 x i8>* %ptr.a, align 2
1625  %load.b = load <16 x i8>* %ptr.b, align 2
1626  %cmp = icmp sgt <16 x i8> %load.a, %load.b
1627  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1628  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1629  %index.next = add i64 %index, 16
1630  %loop = icmp eq i64 %index.next, 16384
1631  br i1 %loop, label %for.end, label %vector.body
1632
1633for.end:                                          ; preds = %vector.body
1634  ret void
1635
1636; SSE4-LABEL: test51:
1637; SSE4: pminsb
1638
1639; AVX1-LABEL: test51:
1640; AVX1: vpminsb
1641
1642; AVX2-LABEL: test51:
1643; AVX2: vpminsb
1644
1645; AVX512VL-LABEL: test51:
1646; AVX512VL: vpminsb
1647}
1648
1649define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
1650vector.ph:
1651  br label %vector.body
1652
1653vector.body:                                      ; preds = %vector.body, %vector.ph
1654  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1655  %gep.a = getelementptr inbounds i8* %a, i64 %index
1656  %gep.b = getelementptr inbounds i8* %b, i64 %index
1657  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1658  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1659  %load.a = load <16 x i8>* %ptr.a, align 2
1660  %load.b = load <16 x i8>* %ptr.b, align 2
1661  %cmp = icmp sge <16 x i8> %load.a, %load.b
1662  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1663  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1664  %index.next = add i64 %index, 16
1665  %loop = icmp eq i64 %index.next, 16384
1666  br i1 %loop, label %for.end, label %vector.body
1667
1668for.end:                                          ; preds = %vector.body
1669  ret void
1670
1671; SSE4-LABEL: test52:
1672; SSE4: pminsb
1673
1674; AVX1-LABEL: test52:
1675; AVX1: vpminsb
1676
1677; AVX2-LABEL: test52:
1678; AVX2: vpminsb
1679
1680; AVX512VL-LABEL: test52:
1681; AVX512VL: vpminsb
1682}
1683
1684define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
1685vector.ph:
1686  br label %vector.body
1687
1688vector.body:                                      ; preds = %vector.body, %vector.ph
1689  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1690  %gep.a = getelementptr inbounds i8* %a, i64 %index
1691  %gep.b = getelementptr inbounds i8* %b, i64 %index
1692  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1693  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1694  %load.a = load <16 x i8>* %ptr.a, align 2
1695  %load.b = load <16 x i8>* %ptr.b, align 2
1696  %cmp = icmp ult <16 x i8> %load.a, %load.b
1697  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1698  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1699  %index.next = add i64 %index, 16
1700  %loop = icmp eq i64 %index.next, 16384
1701  br i1 %loop, label %for.end, label %vector.body
1702
1703for.end:                                          ; preds = %vector.body
1704  ret void
1705
1706; SSE2-LABEL: test53:
1707; SSE2: pmaxub
1708
1709; AVX1-LABEL: test53:
1710; AVX1: vpmaxub
1711
1712; AVX2-LABEL: test53:
1713; AVX2: vpmaxub
1714
1715; AVX512VL-LABEL: test53:
1716; AVX512VL: vpmaxub
1717}
1718
1719define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
1720vector.ph:
1721  br label %vector.body
1722
1723vector.body:                                      ; preds = %vector.body, %vector.ph
1724  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1725  %gep.a = getelementptr inbounds i8* %a, i64 %index
1726  %gep.b = getelementptr inbounds i8* %b, i64 %index
1727  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1728  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1729  %load.a = load <16 x i8>* %ptr.a, align 2
1730  %load.b = load <16 x i8>* %ptr.b, align 2
1731  %cmp = icmp ule <16 x i8> %load.a, %load.b
1732  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1733  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1734  %index.next = add i64 %index, 16
1735  %loop = icmp eq i64 %index.next, 16384
1736  br i1 %loop, label %for.end, label %vector.body
1737
1738for.end:                                          ; preds = %vector.body
1739  ret void
1740
1741; SSE2-LABEL: test54:
1742; SSE2: pmaxub
1743
1744; AVX1-LABEL: test54:
1745; AVX1: vpmaxub
1746
1747; AVX2-LABEL: test54:
1748; AVX2: vpmaxub
1749
1750; AVX512VL-LABEL: test54:
1751; AVX512VL: vpmaxub
1752}
1753
1754define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
1755vector.ph:
1756  br label %vector.body
1757
1758vector.body:                                      ; preds = %vector.body, %vector.ph
1759  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1760  %gep.a = getelementptr inbounds i8* %a, i64 %index
1761  %gep.b = getelementptr inbounds i8* %b, i64 %index
1762  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1763  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1764  %load.a = load <16 x i8>* %ptr.a, align 2
1765  %load.b = load <16 x i8>* %ptr.b, align 2
1766  %cmp = icmp ugt <16 x i8> %load.a, %load.b
1767  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1768  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1769  %index.next = add i64 %index, 16
1770  %loop = icmp eq i64 %index.next, 16384
1771  br i1 %loop, label %for.end, label %vector.body
1772
1773for.end:                                          ; preds = %vector.body
1774  ret void
1775
1776; SSE2-LABEL: test55:
1777; SSE2: pminub
1778
1779; AVX1-LABEL: test55:
1780; AVX1: vpminub
1781
1782; AVX2-LABEL: test55:
1783; AVX2: vpminub
1784
1785; AVX512VL-LABEL: test55:
1786; AVX512VL: vpminub
1787}
1788
1789define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
1790vector.ph:
1791  br label %vector.body
1792
1793vector.body:                                      ; preds = %vector.body, %vector.ph
1794  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1795  %gep.a = getelementptr inbounds i8* %a, i64 %index
1796  %gep.b = getelementptr inbounds i8* %b, i64 %index
1797  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1798  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1799  %load.a = load <16 x i8>* %ptr.a, align 2
1800  %load.b = load <16 x i8>* %ptr.b, align 2
1801  %cmp = icmp uge <16 x i8> %load.a, %load.b
1802  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1803  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1804  %index.next = add i64 %index, 16
1805  %loop = icmp eq i64 %index.next, 16384
1806  br i1 %loop, label %for.end, label %vector.body
1807
1808for.end:                                          ; preds = %vector.body
1809  ret void
1810
1811; SSE2-LABEL: test56:
1812; SSE2: pminub
1813
1814; AVX1-LABEL: test56:
1815; AVX1: vpminub
1816
1817; AVX2-LABEL: test56:
1818; AVX2: vpminub
1819
1820; AVX512VL-LABEL: test56:
1821; AVX512VL: vpminub
1822}
1823
1824define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
1825vector.ph:
1826  br label %vector.body
1827
1828vector.body:                                      ; preds = %vector.body, %vector.ph
1829  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1830  %gep.a = getelementptr inbounds i16* %a, i64 %index
1831  %gep.b = getelementptr inbounds i16* %b, i64 %index
1832  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1833  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1834  %load.a = load <8 x i16>* %ptr.a, align 2
1835  %load.b = load <8 x i16>* %ptr.b, align 2
1836  %cmp = icmp slt <8 x i16> %load.a, %load.b
1837  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1838  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1839  %index.next = add i64 %index, 8
1840  %loop = icmp eq i64 %index.next, 16384
1841  br i1 %loop, label %for.end, label %vector.body
1842
1843for.end:                                          ; preds = %vector.body
1844  ret void
1845
1846; SSE2-LABEL: test57:
1847; SSE2: pmaxsw
1848
1849; AVX1-LABEL: test57:
1850; AVX1: vpmaxsw
1851
1852; AVX2-LABEL: test57:
1853; AVX2: vpmaxsw
1854
1855; AVX512VL-LABEL: test57:
1856; AVX512VL: vpmaxsw
1857}
1858
1859define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
1860vector.ph:
1861  br label %vector.body
1862
1863vector.body:                                      ; preds = %vector.body, %vector.ph
1864  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1865  %gep.a = getelementptr inbounds i16* %a, i64 %index
1866  %gep.b = getelementptr inbounds i16* %b, i64 %index
1867  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1868  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1869  %load.a = load <8 x i16>* %ptr.a, align 2
1870  %load.b = load <8 x i16>* %ptr.b, align 2
1871  %cmp = icmp sle <8 x i16> %load.a, %load.b
1872  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1873  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1874  %index.next = add i64 %index, 8
1875  %loop = icmp eq i64 %index.next, 16384
1876  br i1 %loop, label %for.end, label %vector.body
1877
1878for.end:                                          ; preds = %vector.body
1879  ret void
1880
1881; SSE2-LABEL: test58:
1882; SSE2: pmaxsw
1883
1884; AVX1-LABEL: test58:
1885; AVX1: vpmaxsw
1886
1887; AVX2-LABEL: test58:
1888; AVX2: vpmaxsw
1889
1890; AVX512VL-LABEL: test58:
1891; AVX512VL: vpmaxsw
1892}
1893
1894define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
1895vector.ph:
1896  br label %vector.body
1897
1898vector.body:                                      ; preds = %vector.body, %vector.ph
1899  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1900  %gep.a = getelementptr inbounds i16* %a, i64 %index
1901  %gep.b = getelementptr inbounds i16* %b, i64 %index
1902  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1903  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1904  %load.a = load <8 x i16>* %ptr.a, align 2
1905  %load.b = load <8 x i16>* %ptr.b, align 2
1906  %cmp = icmp sgt <8 x i16> %load.a, %load.b
1907  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1908  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1909  %index.next = add i64 %index, 8
1910  %loop = icmp eq i64 %index.next, 16384
1911  br i1 %loop, label %for.end, label %vector.body
1912
1913for.end:                                          ; preds = %vector.body
1914  ret void
1915
1916; SSE2-LABEL: test59:
1917; SSE2: pminsw
1918
1919; AVX1-LABEL: test59:
1920; AVX1: vpminsw
1921
1922; AVX2-LABEL: test59:
1923; AVX2: vpminsw
1924
1925; AVX512VL-LABEL: test59:
1926; AVX512VL: vpminsw
1927}
1928
1929define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
1930vector.ph:
1931  br label %vector.body
1932
1933vector.body:                                      ; preds = %vector.body, %vector.ph
1934  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1935  %gep.a = getelementptr inbounds i16* %a, i64 %index
1936  %gep.b = getelementptr inbounds i16* %b, i64 %index
1937  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1938  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1939  %load.a = load <8 x i16>* %ptr.a, align 2
1940  %load.b = load <8 x i16>* %ptr.b, align 2
1941  %cmp = icmp sge <8 x i16> %load.a, %load.b
1942  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1943  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1944  %index.next = add i64 %index, 8
1945  %loop = icmp eq i64 %index.next, 16384
1946  br i1 %loop, label %for.end, label %vector.body
1947
1948for.end:                                          ; preds = %vector.body
1949  ret void
1950
1951; SSE2-LABEL: test60:
1952; SSE2: pminsw
1953
1954; AVX1-LABEL: test60:
1955; AVX1: vpminsw
1956
1957; AVX2-LABEL: test60:
1958; AVX2: vpminsw
1959
1960; AVX512VL-LABEL: test60:
1961; AVX512VL: vpminsw
1962}
1963
1964define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
1965vector.ph:
1966  br label %vector.body
1967
1968vector.body:                                      ; preds = %vector.body, %vector.ph
1969  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1970  %gep.a = getelementptr inbounds i16* %a, i64 %index
1971  %gep.b = getelementptr inbounds i16* %b, i64 %index
1972  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1973  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1974  %load.a = load <8 x i16>* %ptr.a, align 2
1975  %load.b = load <8 x i16>* %ptr.b, align 2
1976  %cmp = icmp ult <8 x i16> %load.a, %load.b
1977  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1978  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1979  %index.next = add i64 %index, 8
1980  %loop = icmp eq i64 %index.next, 16384
1981  br i1 %loop, label %for.end, label %vector.body
1982
1983for.end:                                          ; preds = %vector.body
1984  ret void
1985
1986; SSE4-LABEL: test61:
1987; SSE4: pmaxuw
1988
1989; AVX1-LABEL: test61:
1990; AVX1: vpmaxuw
1991
1992; AVX2-LABEL: test61:
1993; AVX2: vpmaxuw
1994
1995; AVX512VL-LABEL: test61:
1996; AVX512VL: vpmaxuw
1997}
1998
1999define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
2000vector.ph:
2001  br label %vector.body
2002
2003vector.body:                                      ; preds = %vector.body, %vector.ph
2004  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2005  %gep.a = getelementptr inbounds i16* %a, i64 %index
2006  %gep.b = getelementptr inbounds i16* %b, i64 %index
2007  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
2008  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
2009  %load.a = load <8 x i16>* %ptr.a, align 2
2010  %load.b = load <8 x i16>* %ptr.b, align 2
2011  %cmp = icmp ule <8 x i16> %load.a, %load.b
2012  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
2013  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
2014  %index.next = add i64 %index, 8
2015  %loop = icmp eq i64 %index.next, 16384
2016  br i1 %loop, label %for.end, label %vector.body
2017
2018for.end:                                          ; preds = %vector.body
2019  ret void
2020
2021; SSE4-LABEL: test62:
2022; SSE4: pmaxuw
2023
2024; AVX1-LABEL: test62:
2025; AVX1: vpmaxuw
2026
2027; AVX2-LABEL: test62:
2028; AVX2: vpmaxuw
2029
2030; AVX512VL-LABEL: test62:
2031; AVX512VL: vpmaxuw
2032}
2033
2034define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
2035vector.ph:
2036  br label %vector.body
2037
2038vector.body:                                      ; preds = %vector.body, %vector.ph
2039  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2040  %gep.a = getelementptr inbounds i16* %a, i64 %index
2041  %gep.b = getelementptr inbounds i16* %b, i64 %index
2042  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
2043  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
2044  %load.a = load <8 x i16>* %ptr.a, align 2
2045  %load.b = load <8 x i16>* %ptr.b, align 2
2046  %cmp = icmp ugt <8 x i16> %load.a, %load.b
2047  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
2048  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
2049  %index.next = add i64 %index, 8
2050  %loop = icmp eq i64 %index.next, 16384
2051  br i1 %loop, label %for.end, label %vector.body
2052
2053for.end:                                          ; preds = %vector.body
2054  ret void
2055
2056; SSE4-LABEL: test63:
2057; SSE4: pminuw
2058
2059; AVX1-LABEL: test63:
2060; AVX1: vpminuw
2061
2062; AVX2-LABEL: test63:
2063; AVX2: vpminuw
2064
2065; AVX512VL-LABEL: test63:
2066; AVX512VL: vpminuw
2067}
2068
2069define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
2070vector.ph:
2071  br label %vector.body
2072
2073vector.body:                                      ; preds = %vector.body, %vector.ph
2074  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2075  %gep.a = getelementptr inbounds i16* %a, i64 %index
2076  %gep.b = getelementptr inbounds i16* %b, i64 %index
2077  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
2078  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
2079  %load.a = load <8 x i16>* %ptr.a, align 2
2080  %load.b = load <8 x i16>* %ptr.b, align 2
2081  %cmp = icmp uge <8 x i16> %load.a, %load.b
2082  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
2083  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
2084  %index.next = add i64 %index, 8
2085  %loop = icmp eq i64 %index.next, 16384
2086  br i1 %loop, label %for.end, label %vector.body
2087
2088for.end:                                          ; preds = %vector.body
2089  ret void
2090
2091; SSE4-LABEL: test64:
2092; SSE4: pminuw
2093
2094; AVX1-LABEL: test64:
2095; AVX1: vpminuw
2096
2097; AVX2-LABEL: test64:
2098; AVX2: vpminuw
2099
2100; AVX512VL-LABEL: test64:
2101; AVX512VL: vpminuw
2102}
2103
2104define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
2105vector.ph:
2106  br label %vector.body
2107
2108vector.body:                                      ; preds = %vector.body, %vector.ph
2109  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2110  %gep.a = getelementptr inbounds i32* %a, i64 %index
2111  %gep.b = getelementptr inbounds i32* %b, i64 %index
2112  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2113  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2114  %load.a = load <4 x i32>* %ptr.a, align 2
2115  %load.b = load <4 x i32>* %ptr.b, align 2
2116  %cmp = icmp slt <4 x i32> %load.a, %load.b
2117  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2118  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2119  %index.next = add i64 %index, 4
2120  %loop = icmp eq i64 %index.next, 16384
2121  br i1 %loop, label %for.end, label %vector.body
2122
2123for.end:                                          ; preds = %vector.body
2124  ret void
2125
2126; SSE4-LABEL: test65:
2127; SSE4: pmaxsd
2128
2129; AVX1-LABEL: test65:
2130; AVX1: vpmaxsd
2131
2132; AVX2-LABEL: test65:
2133; AVX2: vpmaxsd
2134
2135; AVX512VL-LABEL: test65:
2136; AVX512VL: vpmaxsd
2137}
2138
2139define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
2140vector.ph:
2141  br label %vector.body
2142
2143vector.body:                                      ; preds = %vector.body, %vector.ph
2144  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2145  %gep.a = getelementptr inbounds i32* %a, i64 %index
2146  %gep.b = getelementptr inbounds i32* %b, i64 %index
2147  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2148  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2149  %load.a = load <4 x i32>* %ptr.a, align 2
2150  %load.b = load <4 x i32>* %ptr.b, align 2
2151  %cmp = icmp sle <4 x i32> %load.a, %load.b
2152  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2153  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2154  %index.next = add i64 %index, 4
2155  %loop = icmp eq i64 %index.next, 16384
2156  br i1 %loop, label %for.end, label %vector.body
2157
2158for.end:                                          ; preds = %vector.body
2159  ret void
2160
2161; SSE4-LABEL: test66:
2162; SSE4: pmaxsd
2163
2164; AVX1-LABEL: test66:
2165; AVX1: vpmaxsd
2166
2167; AVX2-LABEL: test66:
2168; AVX2: vpmaxsd
2169
2170; AVX512VL-LABEL: test66:
2171; AVX512VL: vpmaxsd
2172}
2173
2174define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
2175vector.ph:
2176  br label %vector.body
2177
2178vector.body:                                      ; preds = %vector.body, %vector.ph
2179  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2180  %gep.a = getelementptr inbounds i32* %a, i64 %index
2181  %gep.b = getelementptr inbounds i32* %b, i64 %index
2182  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2183  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2184  %load.a = load <4 x i32>* %ptr.a, align 2
2185  %load.b = load <4 x i32>* %ptr.b, align 2
2186  %cmp = icmp sgt <4 x i32> %load.a, %load.b
2187  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2188  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2189  %index.next = add i64 %index, 4
2190  %loop = icmp eq i64 %index.next, 16384
2191  br i1 %loop, label %for.end, label %vector.body
2192
2193for.end:                                          ; preds = %vector.body
2194  ret void
2195
2196; SSE4-LABEL: test67:
2197; SSE4: pminsd
2198
2199; AVX1-LABEL: test67:
2200; AVX1: vpminsd
2201
2202; AVX2-LABEL: test67:
2203; AVX2: vpminsd
2204
2205; AVX512VL-LABEL: test67:
2206; AVX512VL: vpminsd
2207}
2208
2209define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
2210vector.ph:
2211  br label %vector.body
2212
2213vector.body:                                      ; preds = %vector.body, %vector.ph
2214  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2215  %gep.a = getelementptr inbounds i32* %a, i64 %index
2216  %gep.b = getelementptr inbounds i32* %b, i64 %index
2217  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2218  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2219  %load.a = load <4 x i32>* %ptr.a, align 2
2220  %load.b = load <4 x i32>* %ptr.b, align 2
2221  %cmp = icmp sge <4 x i32> %load.a, %load.b
2222  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2223  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2224  %index.next = add i64 %index, 4
2225  %loop = icmp eq i64 %index.next, 16384
2226  br i1 %loop, label %for.end, label %vector.body
2227
2228for.end:                                          ; preds = %vector.body
2229  ret void
2230
2231; SSE4-LABEL: test68:
2232; SSE4: pminsd
2233
2234; AVX1-LABEL: test68:
2235; AVX1: vpminsd
2236
2237; AVX2-LABEL: test68:
2238; AVX2: vpminsd
2239
2240; AVX512VL-LABEL: test68:
2241; AVX512VL: vpminsd
2242}
2243
2244define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
2245vector.ph:
2246  br label %vector.body
2247
2248vector.body:                                      ; preds = %vector.body, %vector.ph
2249  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2250  %gep.a = getelementptr inbounds i32* %a, i64 %index
2251  %gep.b = getelementptr inbounds i32* %b, i64 %index
2252  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2253  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2254  %load.a = load <4 x i32>* %ptr.a, align 2
2255  %load.b = load <4 x i32>* %ptr.b, align 2
2256  %cmp = icmp ult <4 x i32> %load.a, %load.b
2257  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2258  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2259  %index.next = add i64 %index, 4
2260  %loop = icmp eq i64 %index.next, 16384
2261  br i1 %loop, label %for.end, label %vector.body
2262
2263for.end:                                          ; preds = %vector.body
2264  ret void
2265
2266; SSE4-LABEL: test69:
2267; SSE4: pmaxud
2268
2269; AVX1-LABEL: test69:
2270; AVX1: vpmaxud
2271
2272; AVX2-LABEL: test69:
2273; AVX2: vpmaxud
2274
2275; AVX512VL-LABEL: test69:
2276; AVX512VL: vpmaxud
2277}
2278
2279define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
2280vector.ph:
2281  br label %vector.body
2282
2283vector.body:                                      ; preds = %vector.body, %vector.ph
2284  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2285  %gep.a = getelementptr inbounds i32* %a, i64 %index
2286  %gep.b = getelementptr inbounds i32* %b, i64 %index
2287  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2288  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2289  %load.a = load <4 x i32>* %ptr.a, align 2
2290  %load.b = load <4 x i32>* %ptr.b, align 2
2291  %cmp = icmp ule <4 x i32> %load.a, %load.b
2292  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2293  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2294  %index.next = add i64 %index, 4
2295  %loop = icmp eq i64 %index.next, 16384
2296  br i1 %loop, label %for.end, label %vector.body
2297
2298for.end:                                          ; preds = %vector.body
2299  ret void
2300
2301; SSE4-LABEL: test70:
2302; SSE4: pmaxud
2303
2304; AVX1-LABEL: test70:
2305; AVX1: vpmaxud
2306
2307; AVX2-LABEL: test70:
2308; AVX2: vpmaxud
2309
2310; AVX512VL-LABEL: test70:
2311; AVX512VL: vpmaxud
2312}
2313
2314define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
2315vector.ph:
2316  br label %vector.body
2317
2318vector.body:                                      ; preds = %vector.body, %vector.ph
2319  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2320  %gep.a = getelementptr inbounds i32* %a, i64 %index
2321  %gep.b = getelementptr inbounds i32* %b, i64 %index
2322  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2323  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2324  %load.a = load <4 x i32>* %ptr.a, align 2
2325  %load.b = load <4 x i32>* %ptr.b, align 2
2326  %cmp = icmp ugt <4 x i32> %load.a, %load.b
2327  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2328  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2329  %index.next = add i64 %index, 4
2330  %loop = icmp eq i64 %index.next, 16384
2331  br i1 %loop, label %for.end, label %vector.body
2332
2333for.end:                                          ; preds = %vector.body
2334  ret void
2335
2336; SSE4-LABEL: test71:
2337; SSE4: pminud
2338
2339; AVX1-LABEL: test71:
2340; AVX1: vpminud
2341
2342; AVX2-LABEL: test71:
2343; AVX2: vpminud
2344
2345; AVX512VL-LABEL: test71:
2346; AVX512VL: vpminud
2347}
2348
2349define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
2350vector.ph:
2351  br label %vector.body
2352
2353vector.body:                                      ; preds = %vector.body, %vector.ph
2354  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2355  %gep.a = getelementptr inbounds i32* %a, i64 %index
2356  %gep.b = getelementptr inbounds i32* %b, i64 %index
2357  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2358  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2359  %load.a = load <4 x i32>* %ptr.a, align 2
2360  %load.b = load <4 x i32>* %ptr.b, align 2
2361  %cmp = icmp uge <4 x i32> %load.a, %load.b
2362  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2363  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2364  %index.next = add i64 %index, 4
2365  %loop = icmp eq i64 %index.next, 16384
2366  br i1 %loop, label %for.end, label %vector.body
2367
2368for.end:                                          ; preds = %vector.body
2369  ret void
2370
2371; SSE4-LABEL: test72:
2372; SSE4: pminud
2373
2374; AVX1-LABEL: test72:
2375; AVX1: vpminud
2376
2377; AVX2-LABEL: test72:
2378; AVX2: vpminud
2379
2380; AVX512VL-LABEL: test72:
2381; AVX512VL: vpminud
2382}
2383
2384define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
2385vector.ph:
2386  br label %vector.body
2387
2388vector.body:                                      ; preds = %vector.body, %vector.ph
2389  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2390  %gep.a = getelementptr inbounds i8* %a, i64 %index
2391  %gep.b = getelementptr inbounds i8* %b, i64 %index
2392  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2393  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2394  %load.a = load <32 x i8>* %ptr.a, align 2
2395  %load.b = load <32 x i8>* %ptr.b, align 2
2396  %cmp = icmp slt <32 x i8> %load.a, %load.b
2397  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2398  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2399  %index.next = add i64 %index, 32
2400  %loop = icmp eq i64 %index.next, 16384
2401  br i1 %loop, label %for.end, label %vector.body
2402
2403for.end:                                          ; preds = %vector.body
2404  ret void
2405
2406; AVX2-LABEL: test73:
2407; AVX2: vpmaxsb
2408
2409; AVX512VL-LABEL: test73:
2410; AVX512VL: vpmaxsb
2411}
2412
2413define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
2414vector.ph:
2415  br label %vector.body
2416
2417vector.body:                                      ; preds = %vector.body, %vector.ph
2418  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2419  %gep.a = getelementptr inbounds i8* %a, i64 %index
2420  %gep.b = getelementptr inbounds i8* %b, i64 %index
2421  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2422  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2423  %load.a = load <32 x i8>* %ptr.a, align 2
2424  %load.b = load <32 x i8>* %ptr.b, align 2
2425  %cmp = icmp sle <32 x i8> %load.a, %load.b
2426  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2427  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2428  %index.next = add i64 %index, 32
2429  %loop = icmp eq i64 %index.next, 16384
2430  br i1 %loop, label %for.end, label %vector.body
2431
2432for.end:                                          ; preds = %vector.body
2433  ret void
2434
2435; AVX2-LABEL: test74:
2436; AVX2: vpmaxsb
2437
2438; AVX512VL-LABEL: test74:
2439; AVX512VL: vpmaxsb
2440}
2441
2442define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
2443vector.ph:
2444  br label %vector.body
2445
2446vector.body:                                      ; preds = %vector.body, %vector.ph
2447  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2448  %gep.a = getelementptr inbounds i8* %a, i64 %index
2449  %gep.b = getelementptr inbounds i8* %b, i64 %index
2450  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2451  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2452  %load.a = load <32 x i8>* %ptr.a, align 2
2453  %load.b = load <32 x i8>* %ptr.b, align 2
2454  %cmp = icmp sgt <32 x i8> %load.a, %load.b
2455  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2456  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2457  %index.next = add i64 %index, 32
2458  %loop = icmp eq i64 %index.next, 16384
2459  br i1 %loop, label %for.end, label %vector.body
2460
2461for.end:                                          ; preds = %vector.body
2462  ret void
2463
2464; AVX2-LABEL: test75:
2465; AVX2: vpminsb
2466
2467; AVX512VL-LABEL: test75:
2468; AVX512VL: vpminsb
2469}
2470
2471define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
2472vector.ph:
2473  br label %vector.body
2474
2475vector.body:                                      ; preds = %vector.body, %vector.ph
2476  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2477  %gep.a = getelementptr inbounds i8* %a, i64 %index
2478  %gep.b = getelementptr inbounds i8* %b, i64 %index
2479  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2480  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2481  %load.a = load <32 x i8>* %ptr.a, align 2
2482  %load.b = load <32 x i8>* %ptr.b, align 2
2483  %cmp = icmp sge <32 x i8> %load.a, %load.b
2484  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2485  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2486  %index.next = add i64 %index, 32
2487  %loop = icmp eq i64 %index.next, 16384
2488  br i1 %loop, label %for.end, label %vector.body
2489
2490for.end:                                          ; preds = %vector.body
2491  ret void
2492
2493; AVX2-LABEL: test76:
2494; AVX2: vpminsb
2495
2496; AVX512VL-LABEL: test76:
2497; AVX512VL: vpminsb
2498}
2499
2500define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
2501vector.ph:
2502  br label %vector.body
2503
2504vector.body:                                      ; preds = %vector.body, %vector.ph
2505  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2506  %gep.a = getelementptr inbounds i8* %a, i64 %index
2507  %gep.b = getelementptr inbounds i8* %b, i64 %index
2508  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2509  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2510  %load.a = load <32 x i8>* %ptr.a, align 2
2511  %load.b = load <32 x i8>* %ptr.b, align 2
2512  %cmp = icmp ult <32 x i8> %load.a, %load.b
2513  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2514  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2515  %index.next = add i64 %index, 32
2516  %loop = icmp eq i64 %index.next, 16384
2517  br i1 %loop, label %for.end, label %vector.body
2518
2519for.end:                                          ; preds = %vector.body
2520  ret void
2521
2522; AVX2-LABEL: test77:
2523; AVX2: vpmaxub
2524
2525; AVX512VL-LABEL: test77:
2526; AVX512VL: vpmaxub
2527}
2528
2529define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
2530vector.ph:
2531  br label %vector.body
2532
2533vector.body:                                      ; preds = %vector.body, %vector.ph
2534  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2535  %gep.a = getelementptr inbounds i8* %a, i64 %index
2536  %gep.b = getelementptr inbounds i8* %b, i64 %index
2537  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2538  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2539  %load.a = load <32 x i8>* %ptr.a, align 2
2540  %load.b = load <32 x i8>* %ptr.b, align 2
2541  %cmp = icmp ule <32 x i8> %load.a, %load.b
2542  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2543  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2544  %index.next = add i64 %index, 32
2545  %loop = icmp eq i64 %index.next, 16384
2546  br i1 %loop, label %for.end, label %vector.body
2547
2548for.end:                                          ; preds = %vector.body
2549  ret void
2550
2551; AVX2-LABEL: test78:
2552; AVX2: vpmaxub
2553
2554; AVX512VL-LABEL: test78:
2555; AVX512VL: vpmaxub
2556}
2557
2558define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
2559vector.ph:
2560  br label %vector.body
2561
2562vector.body:                                      ; preds = %vector.body, %vector.ph
2563  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2564  %gep.a = getelementptr inbounds i8* %a, i64 %index
2565  %gep.b = getelementptr inbounds i8* %b, i64 %index
2566  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2567  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2568  %load.a = load <32 x i8>* %ptr.a, align 2
2569  %load.b = load <32 x i8>* %ptr.b, align 2
2570  %cmp = icmp ugt <32 x i8> %load.a, %load.b
2571  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2572  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2573  %index.next = add i64 %index, 32
2574  %loop = icmp eq i64 %index.next, 16384
2575  br i1 %loop, label %for.end, label %vector.body
2576
2577for.end:                                          ; preds = %vector.body
2578  ret void
2579
2580; AVX2-LABEL: test79:
2581; AVX2: vpminub
2582
2583; AVX512VL-LABEL: test79:
2584; AVX512VL: vpminub
2585}
2586
2587define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
2588vector.ph:
2589  br label %vector.body
2590
2591vector.body:                                      ; preds = %vector.body, %vector.ph
2592  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2593  %gep.a = getelementptr inbounds i8* %a, i64 %index
2594  %gep.b = getelementptr inbounds i8* %b, i64 %index
2595  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2596  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2597  %load.a = load <32 x i8>* %ptr.a, align 2
2598  %load.b = load <32 x i8>* %ptr.b, align 2
2599  %cmp = icmp uge <32 x i8> %load.a, %load.b
2600  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2601  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2602  %index.next = add i64 %index, 32
2603  %loop = icmp eq i64 %index.next, 16384
2604  br i1 %loop, label %for.end, label %vector.body
2605
2606for.end:                                          ; preds = %vector.body
2607  ret void
2608
2609; AVX2-LABEL: test80:
2610; AVX2: vpminub
2611
2612; AVX512VL-LABEL: test80:
2613; AVX512VL: vpminub
2614}
2615
2616define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
2617vector.ph:
2618  br label %vector.body
2619
2620vector.body:                                      ; preds = %vector.body, %vector.ph
2621  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2622  %gep.a = getelementptr inbounds i16* %a, i64 %index
2623  %gep.b = getelementptr inbounds i16* %b, i64 %index
2624  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2625  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2626  %load.a = load <16 x i16>* %ptr.a, align 2
2627  %load.b = load <16 x i16>* %ptr.b, align 2
2628  %cmp = icmp slt <16 x i16> %load.a, %load.b
2629  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2630  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2631  %index.next = add i64 %index, 16
2632  %loop = icmp eq i64 %index.next, 16384
2633  br i1 %loop, label %for.end, label %vector.body
2634
2635for.end:                                          ; preds = %vector.body
2636  ret void
2637
2638; AVX2-LABEL: test81:
2639; AVX2: vpmaxsw
2640
2641; AVX512VL-LABEL: test81:
2642; AVX512VL: vpmaxsw
2643}
2644
2645define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
2646vector.ph:
2647  br label %vector.body
2648
2649vector.body:                                      ; preds = %vector.body, %vector.ph
2650  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2651  %gep.a = getelementptr inbounds i16* %a, i64 %index
2652  %gep.b = getelementptr inbounds i16* %b, i64 %index
2653  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2654  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2655  %load.a = load <16 x i16>* %ptr.a, align 2
2656  %load.b = load <16 x i16>* %ptr.b, align 2
2657  %cmp = icmp sle <16 x i16> %load.a, %load.b
2658  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2659  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2660  %index.next = add i64 %index, 16
2661  %loop = icmp eq i64 %index.next, 16384
2662  br i1 %loop, label %for.end, label %vector.body
2663
2664for.end:                                          ; preds = %vector.body
2665  ret void
2666
2667; AVX2-LABEL: test82:
2668; AVX2: vpmaxsw
2669
2670; AVX512VL-LABEL: test82:
2671; AVX512VL: vpmaxsw
2672}
2673
2674define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
2675vector.ph:
2676  br label %vector.body
2677
2678vector.body:                                      ; preds = %vector.body, %vector.ph
2679  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2680  %gep.a = getelementptr inbounds i16* %a, i64 %index
2681  %gep.b = getelementptr inbounds i16* %b, i64 %index
2682  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2683  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2684  %load.a = load <16 x i16>* %ptr.a, align 2
2685  %load.b = load <16 x i16>* %ptr.b, align 2
2686  %cmp = icmp sgt <16 x i16> %load.a, %load.b
2687  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2688  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2689  %index.next = add i64 %index, 16
2690  %loop = icmp eq i64 %index.next, 16384
2691  br i1 %loop, label %for.end, label %vector.body
2692
2693for.end:                                          ; preds = %vector.body
2694  ret void
2695
2696; AVX2-LABEL: test83:
2697; AVX2: vpminsw
2698
2699; AVX512VL-LABEL: test83:
2700; AVX512VL: vpminsw
2701}
2702
2703define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
2704vector.ph:
2705  br label %vector.body
2706
2707vector.body:                                      ; preds = %vector.body, %vector.ph
2708  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2709  %gep.a = getelementptr inbounds i16* %a, i64 %index
2710  %gep.b = getelementptr inbounds i16* %b, i64 %index
2711  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2712  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2713  %load.a = load <16 x i16>* %ptr.a, align 2
2714  %load.b = load <16 x i16>* %ptr.b, align 2
2715  %cmp = icmp sge <16 x i16> %load.a, %load.b
2716  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2717  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2718  %index.next = add i64 %index, 16
2719  %loop = icmp eq i64 %index.next, 16384
2720  br i1 %loop, label %for.end, label %vector.body
2721
2722for.end:                                          ; preds = %vector.body
2723  ret void
2724
2725; AVX2-LABEL: test84:
2726; AVX2: vpminsw
2727
2728; AVX512VL-LABEL: test84:
2729; AVX512VL: vpminsw
2730}
2731
2732define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
2733vector.ph:
2734  br label %vector.body
2735
2736vector.body:                                      ; preds = %vector.body, %vector.ph
2737  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2738  %gep.a = getelementptr inbounds i16* %a, i64 %index
2739  %gep.b = getelementptr inbounds i16* %b, i64 %index
2740  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2741  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2742  %load.a = load <16 x i16>* %ptr.a, align 2
2743  %load.b = load <16 x i16>* %ptr.b, align 2
2744  %cmp = icmp ult <16 x i16> %load.a, %load.b
2745  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2746  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2747  %index.next = add i64 %index, 16
2748  %loop = icmp eq i64 %index.next, 16384
2749  br i1 %loop, label %for.end, label %vector.body
2750
2751for.end:                                          ; preds = %vector.body
2752  ret void
2753
2754; AVX2-LABEL: test85:
2755; AVX2: vpmaxuw
2756
2757; AVX512VL-LABEL: test85:
2758; AVX512VL: vpmaxuw
2759}
2760
2761define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
2762vector.ph:
2763  br label %vector.body
2764
2765vector.body:                                      ; preds = %vector.body, %vector.ph
2766  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2767  %gep.a = getelementptr inbounds i16* %a, i64 %index
2768  %gep.b = getelementptr inbounds i16* %b, i64 %index
2769  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2770  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2771  %load.a = load <16 x i16>* %ptr.a, align 2
2772  %load.b = load <16 x i16>* %ptr.b, align 2
2773  %cmp = icmp ule <16 x i16> %load.a, %load.b
2774  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2775  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2776  %index.next = add i64 %index, 16
2777  %loop = icmp eq i64 %index.next, 16384
2778  br i1 %loop, label %for.end, label %vector.body
2779
2780for.end:                                          ; preds = %vector.body
2781  ret void
2782
2783; AVX2-LABEL: test86:
2784; AVX2: vpmaxuw
2785
2786; AVX512VL-LABEL: test86:
2787; AVX512VL: vpmaxuw
2788}
2789
2790define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
2791vector.ph:
2792  br label %vector.body
2793
2794vector.body:                                      ; preds = %vector.body, %vector.ph
2795  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2796  %gep.a = getelementptr inbounds i16* %a, i64 %index
2797  %gep.b = getelementptr inbounds i16* %b, i64 %index
2798  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2799  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2800  %load.a = load <16 x i16>* %ptr.a, align 2
2801  %load.b = load <16 x i16>* %ptr.b, align 2
2802  %cmp = icmp ugt <16 x i16> %load.a, %load.b
2803  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2804  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2805  %index.next = add i64 %index, 16
2806  %loop = icmp eq i64 %index.next, 16384
2807  br i1 %loop, label %for.end, label %vector.body
2808
2809for.end:                                          ; preds = %vector.body
2810  ret void
2811
2812; AVX2-LABEL: test87:
2813; AVX2: vpminuw
2814
2815; AVX512VL-LABEL: test87:
2816; AVX512VL: vpminuw
2817}
2818
2819define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
2820vector.ph:
2821  br label %vector.body
2822
2823vector.body:                                      ; preds = %vector.body, %vector.ph
2824  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2825  %gep.a = getelementptr inbounds i16* %a, i64 %index
2826  %gep.b = getelementptr inbounds i16* %b, i64 %index
2827  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2828  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2829  %load.a = load <16 x i16>* %ptr.a, align 2
2830  %load.b = load <16 x i16>* %ptr.b, align 2
2831  %cmp = icmp uge <16 x i16> %load.a, %load.b
2832  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2833  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2834  %index.next = add i64 %index, 16
2835  %loop = icmp eq i64 %index.next, 16384
2836  br i1 %loop, label %for.end, label %vector.body
2837
2838for.end:                                          ; preds = %vector.body
2839  ret void
2840
2841; AVX2-LABEL: test88:
2842; AVX2: vpminuw
2843
2844; AVX512VL-LABEL: test88:
2845; AVX512VL: vpminuw
2846}
2847
2848define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
2849vector.ph:
2850  br label %vector.body
2851
2852vector.body:                                      ; preds = %vector.body, %vector.ph
2853  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2854  %gep.a = getelementptr inbounds i32* %a, i64 %index
2855  %gep.b = getelementptr inbounds i32* %b, i64 %index
2856  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2857  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2858  %load.a = load <8 x i32>* %ptr.a, align 2
2859  %load.b = load <8 x i32>* %ptr.b, align 2
2860  %cmp = icmp slt <8 x i32> %load.a, %load.b
2861  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2862  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2863  %index.next = add i64 %index, 8
2864  %loop = icmp eq i64 %index.next, 16384
2865  br i1 %loop, label %for.end, label %vector.body
2866
2867for.end:                                          ; preds = %vector.body
2868  ret void
2869
2870; AVX2-LABEL: test89:
2871; AVX2: vpmaxsd
2872
2873; AVX512VL-LABEL: test89:
2874; AVX512VL: vpmaxsd
2875}
2876
2877define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
2878vector.ph:
2879  br label %vector.body
2880
2881vector.body:                                      ; preds = %vector.body, %vector.ph
2882  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2883  %gep.a = getelementptr inbounds i32* %a, i64 %index
2884  %gep.b = getelementptr inbounds i32* %b, i64 %index
2885  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2886  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2887  %load.a = load <8 x i32>* %ptr.a, align 2
2888  %load.b = load <8 x i32>* %ptr.b, align 2
2889  %cmp = icmp sle <8 x i32> %load.a, %load.b
2890  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2891  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2892  %index.next = add i64 %index, 8
2893  %loop = icmp eq i64 %index.next, 16384
2894  br i1 %loop, label %for.end, label %vector.body
2895
2896for.end:                                          ; preds = %vector.body
2897  ret void
2898
2899; AVX2-LABEL: test90:
2900; AVX2: vpmaxsd
2901
2902; AVX512VL-LABEL: test90:
2903; AVX512VL: vpmaxsd
2904}
2905
2906define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
2907vector.ph:
2908  br label %vector.body
2909
2910vector.body:                                      ; preds = %vector.body, %vector.ph
2911  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2912  %gep.a = getelementptr inbounds i32* %a, i64 %index
2913  %gep.b = getelementptr inbounds i32* %b, i64 %index
2914  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2915  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2916  %load.a = load <8 x i32>* %ptr.a, align 2
2917  %load.b = load <8 x i32>* %ptr.b, align 2
2918  %cmp = icmp sgt <8 x i32> %load.a, %load.b
2919  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2920  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2921  %index.next = add i64 %index, 8
2922  %loop = icmp eq i64 %index.next, 16384
2923  br i1 %loop, label %for.end, label %vector.body
2924
2925for.end:                                          ; preds = %vector.body
2926  ret void
2927
2928; AVX2-LABEL: test91:
2929; AVX2: vpminsd
2930
2931; AVX512VL-LABEL: test91:
2932; AVX512VL: vpminsd
2933}
2934
2935define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
2936vector.ph:
2937  br label %vector.body
2938
2939vector.body:                                      ; preds = %vector.body, %vector.ph
2940  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2941  %gep.a = getelementptr inbounds i32* %a, i64 %index
2942  %gep.b = getelementptr inbounds i32* %b, i64 %index
2943  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2944  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2945  %load.a = load <8 x i32>* %ptr.a, align 2
2946  %load.b = load <8 x i32>* %ptr.b, align 2
2947  %cmp = icmp sge <8 x i32> %load.a, %load.b
2948  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2949  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2950  %index.next = add i64 %index, 8
2951  %loop = icmp eq i64 %index.next, 16384
2952  br i1 %loop, label %for.end, label %vector.body
2953
2954for.end:                                          ; preds = %vector.body
2955  ret void
2956
2957; AVX2-LABEL: test92:
2958; AVX2: vpminsd
2959
2960; AVX512VL-LABEL: test92:
2961; AVX512VL: vpminsd
2962}
2963
2964define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
2965vector.ph:
2966  br label %vector.body
2967
2968vector.body:                                      ; preds = %vector.body, %vector.ph
2969  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2970  %gep.a = getelementptr inbounds i32* %a, i64 %index
2971  %gep.b = getelementptr inbounds i32* %b, i64 %index
2972  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2973  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2974  %load.a = load <8 x i32>* %ptr.a, align 2
2975  %load.b = load <8 x i32>* %ptr.b, align 2
2976  %cmp = icmp ult <8 x i32> %load.a, %load.b
2977  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2978  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2979  %index.next = add i64 %index, 8
2980  %loop = icmp eq i64 %index.next, 16384
2981  br i1 %loop, label %for.end, label %vector.body
2982
2983for.end:                                          ; preds = %vector.body
2984  ret void
2985
2986; AVX2-LABEL: test93:
2987; AVX2: vpmaxud
2988
2989; AVX512VL-LABEL: test93:
2990; AVX512VL: vpmaxud
2991}
2992
2993define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
2994vector.ph:
2995  br label %vector.body
2996
2997vector.body:                                      ; preds = %vector.body, %vector.ph
2998  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2999  %gep.a = getelementptr inbounds i32* %a, i64 %index
3000  %gep.b = getelementptr inbounds i32* %b, i64 %index
3001  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
3002  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
3003  %load.a = load <8 x i32>* %ptr.a, align 2
3004  %load.b = load <8 x i32>* %ptr.b, align 2
3005  %cmp = icmp ule <8 x i32> %load.a, %load.b
3006  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
3007  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
3008  %index.next = add i64 %index, 8
3009  %loop = icmp eq i64 %index.next, 16384
3010  br i1 %loop, label %for.end, label %vector.body
3011
3012for.end:                                          ; preds = %vector.body
3013  ret void
3014
3015; AVX2-LABEL: test94:
3016; AVX2: vpmaxud
3017
3018; AVX512VL-LABEL: test94:
3019; AVX512VL: vpmaxud
3020}
3021
3022define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
3023vector.ph:
3024  br label %vector.body
3025
3026vector.body:                                      ; preds = %vector.body, %vector.ph
3027  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3028  %gep.a = getelementptr inbounds i32* %a, i64 %index
3029  %gep.b = getelementptr inbounds i32* %b, i64 %index
3030  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
3031  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
3032  %load.a = load <8 x i32>* %ptr.a, align 2
3033  %load.b = load <8 x i32>* %ptr.b, align 2
3034  %cmp = icmp ugt <8 x i32> %load.a, %load.b
3035  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
3036  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
3037  %index.next = add i64 %index, 8
3038  %loop = icmp eq i64 %index.next, 16384
3039  br i1 %loop, label %for.end, label %vector.body
3040
3041for.end:                                          ; preds = %vector.body
3042  ret void
3043
3044; AVX2-LABEL: test95:
3045; AVX2: vpminud
3046
3047; AVX512VL-LABEL: test95:
3048; AVX512VL: vpminud
3049}
3050
3051define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
3052vector.ph:
3053  br label %vector.body
3054
3055vector.body:                                      ; preds = %vector.body, %vector.ph
3056  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3057  %gep.a = getelementptr inbounds i32* %a, i64 %index
3058  %gep.b = getelementptr inbounds i32* %b, i64 %index
3059  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
3060  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
3061  %load.a = load <8 x i32>* %ptr.a, align 2
3062  %load.b = load <8 x i32>* %ptr.b, align 2
3063  %cmp = icmp uge <8 x i32> %load.a, %load.b
3064  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
3065  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
3066  %index.next = add i64 %index, 8
3067  %loop = icmp eq i64 %index.next, 16384
3068  br i1 %loop, label %for.end, label %vector.body
3069
3070for.end:                                          ; preds = %vector.body
3071  ret void
3072
3073; AVX2-LABEL: test96:
3074; AVX2: vpminud
3075
3076; AVX512VL-LABEL: test96:
3077; AVX512VL: vpminud
3078}
3079
3080; ----------------------------
3081
3082define void @test97(i8* nocapture %a, i8* nocapture %b) nounwind {
3083vector.ph:
3084  br label %vector.body
3085
3086vector.body:                                      ; preds = %vector.body, %vector.ph
3087  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3088  %gep.a = getelementptr inbounds i8* %a, i64 %index
3089  %gep.b = getelementptr inbounds i8* %b, i64 %index
3090  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3091  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3092  %load.a = load <64 x i8>* %ptr.a, align 2
3093  %load.b = load <64 x i8>* %ptr.b, align 2
3094  %cmp = icmp slt <64 x i8> %load.a, %load.b
3095  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
3096  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3097  %index.next = add i64 %index, 32
3098  %loop = icmp eq i64 %index.next, 16384
3099  br i1 %loop, label %for.end, label %vector.body
3100
3101for.end:                                          ; preds = %vector.body
3102  ret void
3103
3104; AVX512BW-LABEL: test97:
3105; AVX512BW: vpminsb {{.*}}
3106}
3107
3108define void @test98(i8* nocapture %a, i8* nocapture %b) nounwind {
3109vector.ph:
3110  br label %vector.body
3111
3112vector.body:                                      ; preds = %vector.body, %vector.ph
3113  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3114  %gep.a = getelementptr inbounds i8* %a, i64 %index
3115  %gep.b = getelementptr inbounds i8* %b, i64 %index
3116  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3117  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3118  %load.a = load <64 x i8>* %ptr.a, align 2
3119  %load.b = load <64 x i8>* %ptr.b, align 2
3120  %cmp = icmp sle <64 x i8> %load.a, %load.b
3121  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
3122  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3123  %index.next = add i64 %index, 32
3124  %loop = icmp eq i64 %index.next, 16384
3125  br i1 %loop, label %for.end, label %vector.body
3126
3127for.end:                                          ; preds = %vector.body
3128  ret void
3129
3130; AVX512BW-LABEL: test98:
3131; AVX512BW: vpminsb {{.*}}
3132}
3133
3134define void @test99(i8* nocapture %a, i8* nocapture %b) nounwind {
3135vector.ph:
3136  br label %vector.body
3137
3138vector.body:                                      ; preds = %vector.body, %vector.ph
3139  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3140  %gep.a = getelementptr inbounds i8* %a, i64 %index
3141  %gep.b = getelementptr inbounds i8* %b, i64 %index
3142  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3143  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3144  %load.a = load <64 x i8>* %ptr.a, align 2
3145  %load.b = load <64 x i8>* %ptr.b, align 2
3146  %cmp = icmp sgt <64 x i8> %load.a, %load.b
3147  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
3148  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3149  %index.next = add i64 %index, 32
3150  %loop = icmp eq i64 %index.next, 16384
3151  br i1 %loop, label %for.end, label %vector.body
3152
3153for.end:                                          ; preds = %vector.body
3154  ret void
3155
3156; AVX512BW-LABEL: test99:
3157; AVX512BW: vpmaxsb {{.*}}
3158}
3159
3160define void @test100(i8* nocapture %a, i8* nocapture %b) nounwind {
3161vector.ph:
3162  br label %vector.body
3163
3164vector.body:                                      ; preds = %vector.body, %vector.ph
3165  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3166  %gep.a = getelementptr inbounds i8* %a, i64 %index
3167  %gep.b = getelementptr inbounds i8* %b, i64 %index
3168  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3169  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3170  %load.a = load <64 x i8>* %ptr.a, align 2
3171  %load.b = load <64 x i8>* %ptr.b, align 2
3172  %cmp = icmp sge <64 x i8> %load.a, %load.b
3173  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
3174  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3175  %index.next = add i64 %index, 32
3176  %loop = icmp eq i64 %index.next, 16384
3177  br i1 %loop, label %for.end, label %vector.body
3178
3179for.end:                                          ; preds = %vector.body
3180  ret void
3181
3182; AVX512BW-LABEL: test100:
3183; AVX512BW: vpmaxsb {{.*}}
3184}
3185
3186define void @test101(i8* nocapture %a, i8* nocapture %b) nounwind {
3187vector.ph:
3188  br label %vector.body
3189
3190vector.body:                                      ; preds = %vector.body, %vector.ph
3191  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3192  %gep.a = getelementptr inbounds i8* %a, i64 %index
3193  %gep.b = getelementptr inbounds i8* %b, i64 %index
3194  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3195  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3196  %load.a = load <64 x i8>* %ptr.a, align 2
3197  %load.b = load <64 x i8>* %ptr.b, align 2
3198  %cmp = icmp ult <64 x i8> %load.a, %load.b
3199  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
3200  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3201  %index.next = add i64 %index, 32
3202  %loop = icmp eq i64 %index.next, 16384
3203  br i1 %loop, label %for.end, label %vector.body
3204
3205for.end:                                          ; preds = %vector.body
3206  ret void
3207
3208; AVX512BW-LABEL: test101:
3209; AVX512BW: vpminub {{.*}}
3210}
3211
3212define void @test102(i8* nocapture %a, i8* nocapture %b) nounwind {
3213vector.ph:
3214  br label %vector.body
3215
3216vector.body:                                      ; preds = %vector.body, %vector.ph
3217  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3218  %gep.a = getelementptr inbounds i8* %a, i64 %index
3219  %gep.b = getelementptr inbounds i8* %b, i64 %index
3220  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3221  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3222  %load.a = load <64 x i8>* %ptr.a, align 2
3223  %load.b = load <64 x i8>* %ptr.b, align 2
3224  %cmp = icmp ule <64 x i8> %load.a, %load.b
3225  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
3226  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3227  %index.next = add i64 %index, 32
3228  %loop = icmp eq i64 %index.next, 16384
3229  br i1 %loop, label %for.end, label %vector.body
3230
3231for.end:                                          ; preds = %vector.body
3232  ret void
3233
3234; AVX512BW-LABEL: test102:
3235; AVX512BW: vpminub {{.*}}
3236}
3237
3238define void @test103(i8* nocapture %a, i8* nocapture %b) nounwind {
3239vector.ph:
3240  br label %vector.body
3241
3242vector.body:                                      ; preds = %vector.body, %vector.ph
3243  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3244  %gep.a = getelementptr inbounds i8* %a, i64 %index
3245  %gep.b = getelementptr inbounds i8* %b, i64 %index
3246  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3247  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3248  %load.a = load <64 x i8>* %ptr.a, align 2
3249  %load.b = load <64 x i8>* %ptr.b, align 2
3250  %cmp = icmp ugt <64 x i8> %load.a, %load.b
3251  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
3252  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3253  %index.next = add i64 %index, 32
3254  %loop = icmp eq i64 %index.next, 16384
3255  br i1 %loop, label %for.end, label %vector.body
3256
3257for.end:                                          ; preds = %vector.body
3258  ret void
3259
3260; AVX512BW-LABEL: test103:
3261; AVX512BW: vpmaxub {{.*}}
3262}
3263
3264define void @test104(i8* nocapture %a, i8* nocapture %b) nounwind {
3265vector.ph:
3266  br label %vector.body
3267
3268vector.body:                                      ; preds = %vector.body, %vector.ph
3269  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3270  %gep.a = getelementptr inbounds i8* %a, i64 %index
3271  %gep.b = getelementptr inbounds i8* %b, i64 %index
3272  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3273  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3274  %load.a = load <64 x i8>* %ptr.a, align 2
3275  %load.b = load <64 x i8>* %ptr.b, align 2
3276  %cmp = icmp uge <64 x i8> %load.a, %load.b
3277  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
3278  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3279  %index.next = add i64 %index, 32
3280  %loop = icmp eq i64 %index.next, 16384
3281  br i1 %loop, label %for.end, label %vector.body
3282
3283for.end:                                          ; preds = %vector.body
3284  ret void
3285
3286; AVX512BW-LABEL: test104:
3287; AVX512BW: vpmaxub {{.*}}
3288}
3289
3290define void @test105(i16* nocapture %a, i16* nocapture %b) nounwind {
3291vector.ph:
3292  br label %vector.body
3293
3294vector.body:                                      ; preds = %vector.body, %vector.ph
3295  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3296  %gep.a = getelementptr inbounds i16* %a, i64 %index
3297  %gep.b = getelementptr inbounds i16* %b, i64 %index
3298  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
3299  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
3300  %load.a = load <32 x i16>* %ptr.a, align 2
3301  %load.b = load <32 x i16>* %ptr.b, align 2
3302  %cmp = icmp slt <32 x i16> %load.a, %load.b
3303  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
3304  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
3305  %index.next = add i64 %index, 16
3306  %loop = icmp eq i64 %index.next, 16384
3307  br i1 %loop, label %for.end, label %vector.body
3308
3309for.end:                                          ; preds = %vector.body
3310  ret void
3311
3312; AVX512BW-LABEL: test105:
3313; AVX512BW: vpminsw {{.*}}
3314}
3315
3316define void @test106(i16* nocapture %a, i16* nocapture %b) nounwind {
3317vector.ph:
3318  br label %vector.body
3319
3320vector.body:                                      ; preds = %vector.body, %vector.ph
3321  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3322  %gep.a = getelementptr inbounds i16* %a, i64 %index
3323  %gep.b = getelementptr inbounds i16* %b, i64 %index
3324  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
3325  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
3326  %load.a = load <32 x i16>* %ptr.a, align 2
3327  %load.b = load <32 x i16>* %ptr.b, align 2
3328  %cmp = icmp sle <32 x i16> %load.a, %load.b
3329  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
3330  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
3331  %index.next = add i64 %index, 16
3332  %loop = icmp eq i64 %index.next, 16384
3333  br i1 %loop, label %for.end, label %vector.body
3334
3335for.end:                                          ; preds = %vector.body
3336  ret void
3337
3338; AVX512BW-LABEL: test106:
3339; AVX512BW: vpminsw {{.*}}
3340}
3341
3342define void @test107(i16* nocapture %a, i16* nocapture %b) nounwind {
3343vector.ph:
3344  br label %vector.body
3345
3346vector.body:                                      ; preds = %vector.body, %vector.ph
3347  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3348  %gep.a = getelementptr inbounds i16* %a, i64 %index
3349  %gep.b = getelementptr inbounds i16* %b, i64 %index
3350  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
3351  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
3352  %load.a = load <32 x i16>* %ptr.a, align 2
3353  %load.b = load <32 x i16>* %ptr.b, align 2
3354  %cmp = icmp sgt <32 x i16> %load.a, %load.b
3355  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
3356  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
3357  %index.next = add i64 %index, 16
3358  %loop = icmp eq i64 %index.next, 16384
3359  br i1 %loop, label %for.end, label %vector.body
3360
3361for.end:                                          ; preds = %vector.body
3362  ret void
3363
3364; AVX512BW-LABEL: test107:
3365; AVX512BW: vpmaxsw {{.*}}
3366}
3367
3368define void @test108(i16* nocapture %a, i16* nocapture %b) nounwind {
3369vector.ph:
3370  br label %vector.body
3371
3372vector.body:                                      ; preds = %vector.body, %vector.ph
3373  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3374  %gep.a = getelementptr inbounds i16* %a, i64 %index
3375  %gep.b = getelementptr inbounds i16* %b, i64 %index
3376  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
3377  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
3378  %load.a = load <32 x i16>* %ptr.a, align 2
3379  %load.b = load <32 x i16>* %ptr.b, align 2
3380  %cmp = icmp sge <32 x i16> %load.a, %load.b
3381  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
3382  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
3383  %index.next = add i64 %index, 16
3384  %loop = icmp eq i64 %index.next, 16384
3385  br i1 %loop, label %for.end, label %vector.body
3386
3387for.end:                                          ; preds = %vector.body
3388  ret void
3389
3390; AVX512BW-LABEL: test108:
3391; AVX512BW: vpmaxsw {{.*}}
3392}
3393
3394define void @test109(i16* nocapture %a, i16* nocapture %b) nounwind {
3395vector.ph:
3396  br label %vector.body
3397
3398vector.body:                                      ; preds = %vector.body, %vector.ph
3399  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3400  %gep.a = getelementptr inbounds i16* %a, i64 %index
3401  %gep.b = getelementptr inbounds i16* %b, i64 %index
3402  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
3403  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
3404  %load.a = load <32 x i16>* %ptr.a, align 2
3405  %load.b = load <32 x i16>* %ptr.b, align 2
3406  %cmp = icmp ult <32 x i16> %load.a, %load.b
3407  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
3408  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
3409  %index.next = add i64 %index, 16
3410  %loop = icmp eq i64 %index.next, 16384
3411  br i1 %loop, label %for.end, label %vector.body
3412
3413for.end:                                          ; preds = %vector.body
3414  ret void
3415
3416; AVX512BW-LABEL: test109:
3417; AVX512BW: vpminuw {{.*}}
3418}
3419
3420define void @test110(i16* nocapture %a, i16* nocapture %b) nounwind {
3421vector.ph:
3422  br label %vector.body
3423
3424vector.body:                                      ; preds = %vector.body, %vector.ph
3425  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3426  %gep.a = getelementptr inbounds i16* %a, i64 %index
3427  %gep.b = getelementptr inbounds i16* %b, i64 %index
3428  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
3429  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
3430  %load.a = load <32 x i16>* %ptr.a, align 2
3431  %load.b = load <32 x i16>* %ptr.b, align 2
3432  %cmp = icmp ule <32 x i16> %load.a, %load.b
3433  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
3434  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
3435  %index.next = add i64 %index, 16
3436  %loop = icmp eq i64 %index.next, 16384
3437  br i1 %loop, label %for.end, label %vector.body
3438
3439for.end:                                          ; preds = %vector.body
3440  ret void
3441
3442; AVX512BW-LABEL: test110:
3443; AVX512BW: vpminuw {{.*}}
3444}
3445
3446define void @test111(i16* nocapture %a, i16* nocapture %b) nounwind {
3447vector.ph:
3448  br label %vector.body
3449
3450vector.body:                                      ; preds = %vector.body, %vector.ph
3451  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3452  %gep.a = getelementptr inbounds i16* %a, i64 %index
3453  %gep.b = getelementptr inbounds i16* %b, i64 %index
3454  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
3455  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
3456  %load.a = load <32 x i16>* %ptr.a, align 2
3457  %load.b = load <32 x i16>* %ptr.b, align 2
3458  %cmp = icmp ugt <32 x i16> %load.a, %load.b
3459  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
3460  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
3461  %index.next = add i64 %index, 16
3462  %loop = icmp eq i64 %index.next, 16384
3463  br i1 %loop, label %for.end, label %vector.body
3464
3465for.end:                                          ; preds = %vector.body
3466  ret void
3467
3468; AVX512BW-LABEL: test111:
3469; AVX512BW: vpmaxuw {{.*}}
3470}
3471
3472define void @test112(i16* nocapture %a, i16* nocapture %b) nounwind {
3473vector.ph:
3474  br label %vector.body
3475
3476vector.body:                                      ; preds = %vector.body, %vector.ph
3477  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3478  %gep.a = getelementptr inbounds i16* %a, i64 %index
3479  %gep.b = getelementptr inbounds i16* %b, i64 %index
3480  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
3481  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
3482  %load.a = load <32 x i16>* %ptr.a, align 2
3483  %load.b = load <32 x i16>* %ptr.b, align 2
3484  %cmp = icmp uge <32 x i16> %load.a, %load.b
3485  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
3486  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
3487  %index.next = add i64 %index, 16
3488  %loop = icmp eq i64 %index.next, 16384
3489  br i1 %loop, label %for.end, label %vector.body
3490
3491for.end:                                          ; preds = %vector.body
3492  ret void
3493
3494; AVX512BW-LABEL: test112:
3495; AVX512BW: vpmaxuw {{.*}}
3496}
3497
3498define void @test113(i32* nocapture %a, i32* nocapture %b) nounwind {
3499vector.ph:
3500  br label %vector.body
3501
3502vector.body:                                      ; preds = %vector.body, %vector.ph
3503  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3504  %gep.a = getelementptr inbounds i32* %a, i64 %index
3505  %gep.b = getelementptr inbounds i32* %b, i64 %index
3506  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
3507  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
3508  %load.a = load <16 x i32>* %ptr.a, align 2
3509  %load.b = load <16 x i32>* %ptr.b, align 2
3510  %cmp = icmp slt <16 x i32> %load.a, %load.b
3511  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
3512  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
3513  %index.next = add i64 %index, 8
3514  %loop = icmp eq i64 %index.next, 16384
3515  br i1 %loop, label %for.end, label %vector.body
3516
3517for.end:                                          ; preds = %vector.body
3518  ret void
3519
3520; AVX512F-LABEL: test113:
3521; AVX512F: vpminsd {{.*}}
3522}
3523
3524define void @test114(i32* nocapture %a, i32* nocapture %b) nounwind {
3525vector.ph:
3526  br label %vector.body
3527
3528vector.body:                                      ; preds = %vector.body, %vector.ph
3529  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3530  %gep.a = getelementptr inbounds i32* %a, i64 %index
3531  %gep.b = getelementptr inbounds i32* %b, i64 %index
3532  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
3533  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
3534  %load.a = load <16 x i32>* %ptr.a, align 2
3535  %load.b = load <16 x i32>* %ptr.b, align 2
3536  %cmp = icmp sle <16 x i32> %load.a, %load.b
3537  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
3538  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
3539  %index.next = add i64 %index, 8
3540  %loop = icmp eq i64 %index.next, 16384
3541  br i1 %loop, label %for.end, label %vector.body
3542
3543for.end:                                          ; preds = %vector.body
3544  ret void
3545
3546; AVX512F-LABEL: test114:
3547; AVX512F: vpminsd {{.*}}
3548}
3549
3550define void @test115(i32* nocapture %a, i32* nocapture %b) nounwind {
3551vector.ph:
3552  br label %vector.body
3553
3554vector.body:                                      ; preds = %vector.body, %vector.ph
3555  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3556  %gep.a = getelementptr inbounds i32* %a, i64 %index
3557  %gep.b = getelementptr inbounds i32* %b, i64 %index
3558  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
3559  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
3560  %load.a = load <16 x i32>* %ptr.a, align 2
3561  %load.b = load <16 x i32>* %ptr.b, align 2
3562  %cmp = icmp sgt <16 x i32> %load.a, %load.b
3563  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
3564  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
3565  %index.next = add i64 %index, 8
3566  %loop = icmp eq i64 %index.next, 16384
3567  br i1 %loop, label %for.end, label %vector.body
3568
3569for.end:                                          ; preds = %vector.body
3570  ret void
3571
3572; AVX512F-LABEL: test115:
3573; AVX512F: vpmaxsd {{.*}}
3574}
3575
3576define void @test116(i32* nocapture %a, i32* nocapture %b) nounwind {
3577vector.ph:
3578  br label %vector.body
3579
3580vector.body:                                      ; preds = %vector.body, %vector.ph
3581  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3582  %gep.a = getelementptr inbounds i32* %a, i64 %index
3583  %gep.b = getelementptr inbounds i32* %b, i64 %index
3584  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
3585  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
3586  %load.a = load <16 x i32>* %ptr.a, align 2
3587  %load.b = load <16 x i32>* %ptr.b, align 2
3588  %cmp = icmp sge <16 x i32> %load.a, %load.b
3589  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
3590  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
3591  %index.next = add i64 %index, 8
3592  %loop = icmp eq i64 %index.next, 16384
3593  br i1 %loop, label %for.end, label %vector.body
3594
3595for.end:                                          ; preds = %vector.body
3596  ret void
3597
3598; AVX512F-LABEL: test116:
3599; AVX512F: vpmaxsd {{.*}}
3600}
3601
3602define void @test117(i32* nocapture %a, i32* nocapture %b) nounwind {
3603vector.ph:
3604  br label %vector.body
3605
3606vector.body:                                      ; preds = %vector.body, %vector.ph
3607  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3608  %gep.a = getelementptr inbounds i32* %a, i64 %index
3609  %gep.b = getelementptr inbounds i32* %b, i64 %index
3610  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
3611  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
3612  %load.a = load <16 x i32>* %ptr.a, align 2
3613  %load.b = load <16 x i32>* %ptr.b, align 2
3614  %cmp = icmp ult <16 x i32> %load.a, %load.b
3615  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
3616  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
3617  %index.next = add i64 %index, 8
3618  %loop = icmp eq i64 %index.next, 16384
3619  br i1 %loop, label %for.end, label %vector.body
3620
3621for.end:                                          ; preds = %vector.body
3622  ret void
3623
3624; AVX512F-LABEL: test117:
3625; AVX512F: vpminud {{.*}}
3626}
3627
3628define void @test118(i32* nocapture %a, i32* nocapture %b) nounwind {
3629vector.ph:
3630  br label %vector.body
3631
3632vector.body:                                      ; preds = %vector.body, %vector.ph
3633  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3634  %gep.a = getelementptr inbounds i32* %a, i64 %index
3635  %gep.b = getelementptr inbounds i32* %b, i64 %index
3636  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
3637  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
3638  %load.a = load <16 x i32>* %ptr.a, align 2
3639  %load.b = load <16 x i32>* %ptr.b, align 2
3640  %cmp = icmp ule <16 x i32> %load.a, %load.b
3641  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
3642  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
3643  %index.next = add i64 %index, 8
3644  %loop = icmp eq i64 %index.next, 16384
3645  br i1 %loop, label %for.end, label %vector.body
3646
3647for.end:                                          ; preds = %vector.body
3648  ret void
3649
3650; AVX512F-LABEL: test118:
3651; AVX512F: vpminud {{.*}}
3652}
3653
3654define void @test119(i32* nocapture %a, i32* nocapture %b) nounwind {
3655vector.ph:
3656  br label %vector.body
3657
3658vector.body:                                      ; preds = %vector.body, %vector.ph
3659  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3660  %gep.a = getelementptr inbounds i32* %a, i64 %index
3661  %gep.b = getelementptr inbounds i32* %b, i64 %index
3662  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
3663  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
3664  %load.a = load <16 x i32>* %ptr.a, align 2
3665  %load.b = load <16 x i32>* %ptr.b, align 2
3666  %cmp = icmp ugt <16 x i32> %load.a, %load.b
3667  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
3668  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
3669  %index.next = add i64 %index, 8
3670  %loop = icmp eq i64 %index.next, 16384
3671  br i1 %loop, label %for.end, label %vector.body
3672
3673for.end:                                          ; preds = %vector.body
3674  ret void
3675
3676; AVX512F-LABEL: test119:
3677; AVX512F: vpmaxud {{.*}}
3678}
3679
3680define void @test120(i32* nocapture %a, i32* nocapture %b) nounwind {
3681vector.ph:
3682  br label %vector.body
3683
3684vector.body:                                      ; preds = %vector.body, %vector.ph
3685  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3686  %gep.a = getelementptr inbounds i32* %a, i64 %index
3687  %gep.b = getelementptr inbounds i32* %b, i64 %index
3688  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
3689  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
3690  %load.a = load <16 x i32>* %ptr.a, align 2
3691  %load.b = load <16 x i32>* %ptr.b, align 2
3692  %cmp = icmp uge <16 x i32> %load.a, %load.b
3693  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
3694  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
3695  %index.next = add i64 %index, 8
3696  %loop = icmp eq i64 %index.next, 16384
3697  br i1 %loop, label %for.end, label %vector.body
3698
3699for.end:                                          ; preds = %vector.body
3700  ret void
3701
3702; AVX512F-LABEL: test120:
3703; AVX512F: vpmaxud {{.*}}
3704}
3705
3706define void @test121(i32* nocapture %a, i32* nocapture %b) nounwind {
3707vector.ph:
3708  br label %vector.body
3709
3710vector.body:                                      ; preds = %vector.body, %vector.ph
3711  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3712  %gep.a = getelementptr inbounds i32* %a, i64 %index
3713  %gep.b = getelementptr inbounds i32* %b, i64 %index
3714  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
3715  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
3716  %load.a = load <8 x i64>* %ptr.a, align 2
3717  %load.b = load <8 x i64>* %ptr.b, align 2
3718  %cmp = icmp slt <8 x i64> %load.a, %load.b
3719  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
3720  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
3721  %index.next = add i64 %index, 8
3722  %loop = icmp eq i64 %index.next, 16384
3723  br i1 %loop, label %for.end, label %vector.body
3724
3725for.end:                                          ; preds = %vector.body
3726  ret void
3727
3728; AVX512F-LABEL: test121:
3729; AVX512F: vpminsq {{.*}}
3730}
3731
3732define void @test122(i32* nocapture %a, i32* nocapture %b) nounwind {
3733vector.ph:
3734  br label %vector.body
3735
3736vector.body:                                      ; preds = %vector.body, %vector.ph
3737  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3738  %gep.a = getelementptr inbounds i32* %a, i64 %index
3739  %gep.b = getelementptr inbounds i32* %b, i64 %index
3740  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
3741  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
3742  %load.a = load <8 x i64>* %ptr.a, align 2
3743  %load.b = load <8 x i64>* %ptr.b, align 2
3744  %cmp = icmp sle <8 x i64> %load.a, %load.b
3745  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
3746  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
3747  %index.next = add i64 %index, 8
3748  %loop = icmp eq i64 %index.next, 16384
3749  br i1 %loop, label %for.end, label %vector.body
3750
3751for.end:                                          ; preds = %vector.body
3752  ret void
3753
3754; AVX512F-LABEL: test122:
3755; AVX512F: vpminsq {{.*}}
3756}
3757
3758define void @test123(i32* nocapture %a, i32* nocapture %b) nounwind {
3759vector.ph:
3760  br label %vector.body
3761
3762vector.body:                                      ; preds = %vector.body, %vector.ph
3763  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3764  %gep.a = getelementptr inbounds i32* %a, i64 %index
3765  %gep.b = getelementptr inbounds i32* %b, i64 %index
3766  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
3767  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
3768  %load.a = load <8 x i64>* %ptr.a, align 2
3769  %load.b = load <8 x i64>* %ptr.b, align 2
3770  %cmp = icmp sgt <8 x i64> %load.a, %load.b
3771  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
3772  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
3773  %index.next = add i64 %index, 8
3774  %loop = icmp eq i64 %index.next, 16384
3775  br i1 %loop, label %for.end, label %vector.body
3776
3777for.end:                                          ; preds = %vector.body
3778  ret void
3779
3780; AVX512F-LABEL: test123:
3781; AVX512F: vpmaxsq {{.*}}
3782}
3783
3784define void @test124(i32* nocapture %a, i32* nocapture %b) nounwind {
3785vector.ph:
3786  br label %vector.body
3787
3788vector.body:                                      ; preds = %vector.body, %vector.ph
3789  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3790  %gep.a = getelementptr inbounds i32* %a, i64 %index
3791  %gep.b = getelementptr inbounds i32* %b, i64 %index
3792  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
3793  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
3794  %load.a = load <8 x i64>* %ptr.a, align 2
3795  %load.b = load <8 x i64>* %ptr.b, align 2
3796  %cmp = icmp sge <8 x i64> %load.a, %load.b
3797  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
3798  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
3799  %index.next = add i64 %index, 8
3800  %loop = icmp eq i64 %index.next, 16384
3801  br i1 %loop, label %for.end, label %vector.body
3802
3803for.end:                                          ; preds = %vector.body
3804  ret void
3805
3806; AVX512F-LABEL: test124:
3807; AVX512F: vpmaxsq {{.*}}
3808}
3809
3810define void @test125(i32* nocapture %a, i32* nocapture %b) nounwind {
3811vector.ph:
3812  br label %vector.body
3813
3814vector.body:                                      ; preds = %vector.body, %vector.ph
3815  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3816  %gep.a = getelementptr inbounds i32* %a, i64 %index
3817  %gep.b = getelementptr inbounds i32* %b, i64 %index
3818  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
3819  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
3820  %load.a = load <8 x i64>* %ptr.a, align 2
3821  %load.b = load <8 x i64>* %ptr.b, align 2
3822  %cmp = icmp ult <8 x i64> %load.a, %load.b
3823  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
3824  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
3825  %index.next = add i64 %index, 8
3826  %loop = icmp eq i64 %index.next, 16384
3827  br i1 %loop, label %for.end, label %vector.body
3828
3829for.end:                                          ; preds = %vector.body
3830  ret void
3831
3832; AVX512F-LABEL: test125:
3833; AVX512F: vpminuq {{.*}}
3834}
3835
3836define void @test126(i32* nocapture %a, i32* nocapture %b) nounwind {
3837vector.ph:
3838  br label %vector.body
3839
3840vector.body:                                      ; preds = %vector.body, %vector.ph
3841  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3842  %gep.a = getelementptr inbounds i32* %a, i64 %index
3843  %gep.b = getelementptr inbounds i32* %b, i64 %index
3844  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
3845  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
3846  %load.a = load <8 x i64>* %ptr.a, align 2
3847  %load.b = load <8 x i64>* %ptr.b, align 2
3848  %cmp = icmp ule <8 x i64> %load.a, %load.b
3849  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
3850  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
3851  %index.next = add i64 %index, 8
3852  %loop = icmp eq i64 %index.next, 16384
3853  br i1 %loop, label %for.end, label %vector.body
3854
3855for.end:                                          ; preds = %vector.body
3856  ret void
3857
3858; AVX512F-LABEL: test126:
3859; AVX512F: vpminuq {{.*}}
3860}
3861
3862define void @test127(i32* nocapture %a, i32* nocapture %b) nounwind {
3863vector.ph:
3864  br label %vector.body
3865
3866vector.body:                                      ; preds = %vector.body, %vector.ph
3867  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3868  %gep.a = getelementptr inbounds i32* %a, i64 %index
3869  %gep.b = getelementptr inbounds i32* %b, i64 %index
3870  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
3871  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
3872  %load.a = load <8 x i64>* %ptr.a, align 2
3873  %load.b = load <8 x i64>* %ptr.b, align 2
3874  %cmp = icmp ugt <8 x i64> %load.a, %load.b
3875  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
3876  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
3877  %index.next = add i64 %index, 8
3878  %loop = icmp eq i64 %index.next, 16384
3879  br i1 %loop, label %for.end, label %vector.body
3880
3881for.end:                                          ; preds = %vector.body
3882  ret void
3883
3884; AVX512F-LABEL: test127:
3885; AVX512F: vpmaxuq {{.*}}
3886}
3887
3888define void @test128(i32* nocapture %a, i32* nocapture %b) nounwind {
3889vector.ph:
3890  br label %vector.body
3891
3892vector.body:                                      ; preds = %vector.body, %vector.ph
3893  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3894  %gep.a = getelementptr inbounds i32* %a, i64 %index
3895  %gep.b = getelementptr inbounds i32* %b, i64 %index
3896  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
3897  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
3898  %load.a = load <8 x i64>* %ptr.a, align 2
3899  %load.b = load <8 x i64>* %ptr.b, align 2
3900  %cmp = icmp uge <8 x i64> %load.a, %load.b
3901  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
3902  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
3903  %index.next = add i64 %index, 8
3904  %loop = icmp eq i64 %index.next, 16384
3905  br i1 %loop, label %for.end, label %vector.body
3906
3907for.end:                                          ; preds = %vector.body
3908  ret void
3909
3910; AVX512F-LABEL: test128:
3911; AVX512F: vpmaxuq {{.*}}
3912}
3913
3914define void @test129(i8* nocapture %a, i8* nocapture %b) nounwind {
3915vector.ph:
3916  br label %vector.body
3917
3918vector.body:                                      ; preds = %vector.body, %vector.ph
3919  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3920  %gep.a = getelementptr inbounds i8* %a, i64 %index
3921  %gep.b = getelementptr inbounds i8* %b, i64 %index
3922  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3923  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3924  %load.a = load <64 x i8>* %ptr.a, align 2
3925  %load.b = load <64 x i8>* %ptr.b, align 2
3926  %cmp = icmp slt <64 x i8> %load.a, %load.b
3927  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
3928  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3929  %index.next = add i64 %index, 32
3930  %loop = icmp eq i64 %index.next, 16384
3931  br i1 %loop, label %for.end, label %vector.body
3932
3933for.end:                                          ; preds = %vector.body
3934  ret void
3935
3936; AVX512BW-LABEL: test129:
3937; AVX512BW: vpmaxsb
3938}
3939
3940define void @test130(i8* nocapture %a, i8* nocapture %b) nounwind {
3941vector.ph:
3942  br label %vector.body
3943
3944vector.body:                                      ; preds = %vector.body, %vector.ph
3945  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3946  %gep.a = getelementptr inbounds i8* %a, i64 %index
3947  %gep.b = getelementptr inbounds i8* %b, i64 %index
3948  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3949  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3950  %load.a = load <64 x i8>* %ptr.a, align 2
3951  %load.b = load <64 x i8>* %ptr.b, align 2
3952  %cmp = icmp sle <64 x i8> %load.a, %load.b
3953  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
3954  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3955  %index.next = add i64 %index, 32
3956  %loop = icmp eq i64 %index.next, 16384
3957  br i1 %loop, label %for.end, label %vector.body
3958
3959for.end:                                          ; preds = %vector.body
3960  ret void
3961
3962; AVX512BW-LABEL: test130:
3963; AVX512BW: vpmaxsb
3964}
3965
3966define void @test131(i8* nocapture %a, i8* nocapture %b) nounwind {
3967vector.ph:
3968  br label %vector.body
3969
3970vector.body:                                      ; preds = %vector.body, %vector.ph
3971  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3972  %gep.a = getelementptr inbounds i8* %a, i64 %index
3973  %gep.b = getelementptr inbounds i8* %b, i64 %index
3974  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
3975  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
3976  %load.a = load <64 x i8>* %ptr.a, align 2
3977  %load.b = load <64 x i8>* %ptr.b, align 2
3978  %cmp = icmp sgt <64 x i8> %load.a, %load.b
3979  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
3980  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
3981  %index.next = add i64 %index, 32
3982  %loop = icmp eq i64 %index.next, 16384
3983  br i1 %loop, label %for.end, label %vector.body
3984
3985for.end:                                          ; preds = %vector.body
3986  ret void
3987
3988; AVX512BW-LABEL: test131:
3989; AVX512BW: vpminsb
3990}
3991
3992define void @test132(i8* nocapture %a, i8* nocapture %b) nounwind {
3993vector.ph:
3994  br label %vector.body
3995
3996vector.body:                                      ; preds = %vector.body, %vector.ph
3997  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
3998  %gep.a = getelementptr inbounds i8* %a, i64 %index
3999  %gep.b = getelementptr inbounds i8* %b, i64 %index
4000  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
4001  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
4002  %load.a = load <64 x i8>* %ptr.a, align 2
4003  %load.b = load <64 x i8>* %ptr.b, align 2
4004  %cmp = icmp sge <64 x i8> %load.a, %load.b
4005  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
4006  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
4007  %index.next = add i64 %index, 32
4008  %loop = icmp eq i64 %index.next, 16384
4009  br i1 %loop, label %for.end, label %vector.body
4010
4011for.end:                                          ; preds = %vector.body
4012  ret void
4013
4014; AVX512BW-LABEL: test132:
4015; AVX512BW: vpminsb
4016}
4017
4018define void @test133(i8* nocapture %a, i8* nocapture %b) nounwind {
4019vector.ph:
4020  br label %vector.body
4021
4022vector.body:                                      ; preds = %vector.body, %vector.ph
4023  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4024  %gep.a = getelementptr inbounds i8* %a, i64 %index
4025  %gep.b = getelementptr inbounds i8* %b, i64 %index
4026  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
4027  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
4028  %load.a = load <64 x i8>* %ptr.a, align 2
4029  %load.b = load <64 x i8>* %ptr.b, align 2
4030  %cmp = icmp ult <64 x i8> %load.a, %load.b
4031  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
4032  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
4033  %index.next = add i64 %index, 32
4034  %loop = icmp eq i64 %index.next, 16384
4035  br i1 %loop, label %for.end, label %vector.body
4036
4037for.end:                                          ; preds = %vector.body
4038  ret void
4039
4040; AVX512BW-LABEL: test133:
4041; AVX512BW: vpmaxub
4042}
4043
4044define void @test134(i8* nocapture %a, i8* nocapture %b) nounwind {
4045vector.ph:
4046  br label %vector.body
4047
4048vector.body:                                      ; preds = %vector.body, %vector.ph
4049  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4050  %gep.a = getelementptr inbounds i8* %a, i64 %index
4051  %gep.b = getelementptr inbounds i8* %b, i64 %index
4052  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
4053  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
4054  %load.a = load <64 x i8>* %ptr.a, align 2
4055  %load.b = load <64 x i8>* %ptr.b, align 2
4056  %cmp = icmp ule <64 x i8> %load.a, %load.b
4057  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
4058  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
4059  %index.next = add i64 %index, 32
4060  %loop = icmp eq i64 %index.next, 16384
4061  br i1 %loop, label %for.end, label %vector.body
4062
4063for.end:                                          ; preds = %vector.body
4064  ret void
4065
4066; AVX512BW-LABEL: test134:
4067; AVX512BW: vpmaxub
4068}
4069
4070define void @test135(i8* nocapture %a, i8* nocapture %b) nounwind {
4071vector.ph:
4072  br label %vector.body
4073
4074vector.body:                                      ; preds = %vector.body, %vector.ph
4075  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4076  %gep.a = getelementptr inbounds i8* %a, i64 %index
4077  %gep.b = getelementptr inbounds i8* %b, i64 %index
4078  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
4079  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
4080  %load.a = load <64 x i8>* %ptr.a, align 2
4081  %load.b = load <64 x i8>* %ptr.b, align 2
4082  %cmp = icmp ugt <64 x i8> %load.a, %load.b
4083  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
4084  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
4085  %index.next = add i64 %index, 32
4086  %loop = icmp eq i64 %index.next, 16384
4087  br i1 %loop, label %for.end, label %vector.body
4088
4089for.end:                                          ; preds = %vector.body
4090  ret void
4091
4092; AVX512BW-LABEL: test135:
4093; AVX512BW: vpminub
4094}
4095
4096define void @test136(i8* nocapture %a, i8* nocapture %b) nounwind {
4097vector.ph:
4098  br label %vector.body
4099
4100vector.body:                                      ; preds = %vector.body, %vector.ph
4101  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4102  %gep.a = getelementptr inbounds i8* %a, i64 %index
4103  %gep.b = getelementptr inbounds i8* %b, i64 %index
4104  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
4105  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
4106  %load.a = load <64 x i8>* %ptr.a, align 2
4107  %load.b = load <64 x i8>* %ptr.b, align 2
4108  %cmp = icmp uge <64 x i8> %load.a, %load.b
4109  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
4110  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
4111  %index.next = add i64 %index, 32
4112  %loop = icmp eq i64 %index.next, 16384
4113  br i1 %loop, label %for.end, label %vector.body
4114
4115for.end:                                          ; preds = %vector.body
4116  ret void
4117
4118; AVX512BW-LABEL: test136:
4119; AVX512BW: vpminub
4120}
4121
4122define void @test137(i16* nocapture %a, i16* nocapture %b) nounwind {
4123vector.ph:
4124  br label %vector.body
4125
4126vector.body:                                      ; preds = %vector.body, %vector.ph
4127  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4128  %gep.a = getelementptr inbounds i16* %a, i64 %index
4129  %gep.b = getelementptr inbounds i16* %b, i64 %index
4130  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
4131  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
4132  %load.a = load <32 x i16>* %ptr.a, align 2
4133  %load.b = load <32 x i16>* %ptr.b, align 2
4134  %cmp = icmp slt <32 x i16> %load.a, %load.b
4135  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
4136  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
4137  %index.next = add i64 %index, 16
4138  %loop = icmp eq i64 %index.next, 16384
4139  br i1 %loop, label %for.end, label %vector.body
4140
4141for.end:                                          ; preds = %vector.body
4142  ret void
4143
4144; AVX512BW-LABEL: test137:
4145; AVX512BW: vpmaxsw
4146}
4147
4148define void @test138(i16* nocapture %a, i16* nocapture %b) nounwind {
4149vector.ph:
4150  br label %vector.body
4151
4152vector.body:                                      ; preds = %vector.body, %vector.ph
4153  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4154  %gep.a = getelementptr inbounds i16* %a, i64 %index
4155  %gep.b = getelementptr inbounds i16* %b, i64 %index
4156  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
4157  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
4158  %load.a = load <32 x i16>* %ptr.a, align 2
4159  %load.b = load <32 x i16>* %ptr.b, align 2
4160  %cmp = icmp sle <32 x i16> %load.a, %load.b
4161  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
4162  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
4163  %index.next = add i64 %index, 16
4164  %loop = icmp eq i64 %index.next, 16384
4165  br i1 %loop, label %for.end, label %vector.body
4166
4167for.end:                                          ; preds = %vector.body
4168  ret void
4169
4170; AVX512BW-LABEL: test138:
4171; AVX512BW: vpmaxsw
4172}
4173
4174define void @test139(i16* nocapture %a, i16* nocapture %b) nounwind {
4175vector.ph:
4176  br label %vector.body
4177
4178vector.body:                                      ; preds = %vector.body, %vector.ph
4179  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4180  %gep.a = getelementptr inbounds i16* %a, i64 %index
4181  %gep.b = getelementptr inbounds i16* %b, i64 %index
4182  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
4183  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
4184  %load.a = load <32 x i16>* %ptr.a, align 2
4185  %load.b = load <32 x i16>* %ptr.b, align 2
4186  %cmp = icmp sgt <32 x i16> %load.a, %load.b
4187  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
4188  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
4189  %index.next = add i64 %index, 16
4190  %loop = icmp eq i64 %index.next, 16384
4191  br i1 %loop, label %for.end, label %vector.body
4192
4193for.end:                                          ; preds = %vector.body
4194  ret void
4195
4196; AVX512BW-LABEL: test139:
4197; AVX512BW: vpminsw
4198}
4199
4200define void @test140(i16* nocapture %a, i16* nocapture %b) nounwind {
4201vector.ph:
4202  br label %vector.body
4203
4204vector.body:                                      ; preds = %vector.body, %vector.ph
4205  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4206  %gep.a = getelementptr inbounds i16* %a, i64 %index
4207  %gep.b = getelementptr inbounds i16* %b, i64 %index
4208  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
4209  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
4210  %load.a = load <32 x i16>* %ptr.a, align 2
4211  %load.b = load <32 x i16>* %ptr.b, align 2
4212  %cmp = icmp sge <32 x i16> %load.a, %load.b
4213  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
4214  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
4215  %index.next = add i64 %index, 16
4216  %loop = icmp eq i64 %index.next, 16384
4217  br i1 %loop, label %for.end, label %vector.body
4218
4219for.end:                                          ; preds = %vector.body
4220  ret void
4221
4222; AVX512BW-LABEL: test140:
4223; AVX512BW: vpminsw
4224}
4225
4226define void @test141(i16* nocapture %a, i16* nocapture %b) nounwind {
4227vector.ph:
4228  br label %vector.body
4229
4230vector.body:                                      ; preds = %vector.body, %vector.ph
4231  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4232  %gep.a = getelementptr inbounds i16* %a, i64 %index
4233  %gep.b = getelementptr inbounds i16* %b, i64 %index
4234  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
4235  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
4236  %load.a = load <32 x i16>* %ptr.a, align 2
4237  %load.b = load <32 x i16>* %ptr.b, align 2
4238  %cmp = icmp ult <32 x i16> %load.a, %load.b
4239  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
4240  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
4241  %index.next = add i64 %index, 16
4242  %loop = icmp eq i64 %index.next, 16384
4243  br i1 %loop, label %for.end, label %vector.body
4244
4245for.end:                                          ; preds = %vector.body
4246  ret void
4247
4248; AVX512BW-LABEL: test141:
4249; AVX512BW: vpmaxuw
4250}
4251
4252define void @test142(i16* nocapture %a, i16* nocapture %b) nounwind {
4253vector.ph:
4254  br label %vector.body
4255
4256vector.body:                                      ; preds = %vector.body, %vector.ph
4257  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4258  %gep.a = getelementptr inbounds i16* %a, i64 %index
4259  %gep.b = getelementptr inbounds i16* %b, i64 %index
4260  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
4261  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
4262  %load.a = load <32 x i16>* %ptr.a, align 2
4263  %load.b = load <32 x i16>* %ptr.b, align 2
4264  %cmp = icmp ule <32 x i16> %load.a, %load.b
4265  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
4266  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
4267  %index.next = add i64 %index, 16
4268  %loop = icmp eq i64 %index.next, 16384
4269  br i1 %loop, label %for.end, label %vector.body
4270
4271for.end:                                          ; preds = %vector.body
4272  ret void
4273
4274; AVX512BW-LABEL: test142:
4275; AVX512BW: vpmaxuw
4276}
4277
4278define void @test143(i16* nocapture %a, i16* nocapture %b) nounwind {
4279vector.ph:
4280  br label %vector.body
4281
4282vector.body:                                      ; preds = %vector.body, %vector.ph
4283  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4284  %gep.a = getelementptr inbounds i16* %a, i64 %index
4285  %gep.b = getelementptr inbounds i16* %b, i64 %index
4286  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
4287  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
4288  %load.a = load <32 x i16>* %ptr.a, align 2
4289  %load.b = load <32 x i16>* %ptr.b, align 2
4290  %cmp = icmp ugt <32 x i16> %load.a, %load.b
4291  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
4292  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
4293  %index.next = add i64 %index, 16
4294  %loop = icmp eq i64 %index.next, 16384
4295  br i1 %loop, label %for.end, label %vector.body
4296
4297for.end:                                          ; preds = %vector.body
4298  ret void
4299
4300; AVX512BW-LABEL: test143:
4301; AVX512BW: vpminuw
4302}
4303
4304define void @test144(i16* nocapture %a, i16* nocapture %b) nounwind {
4305vector.ph:
4306  br label %vector.body
4307
4308vector.body:                                      ; preds = %vector.body, %vector.ph
4309  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4310  %gep.a = getelementptr inbounds i16* %a, i64 %index
4311  %gep.b = getelementptr inbounds i16* %b, i64 %index
4312  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
4313  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
4314  %load.a = load <32 x i16>* %ptr.a, align 2
4315  %load.b = load <32 x i16>* %ptr.b, align 2
4316  %cmp = icmp uge <32 x i16> %load.a, %load.b
4317  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
4318  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
4319  %index.next = add i64 %index, 16
4320  %loop = icmp eq i64 %index.next, 16384
4321  br i1 %loop, label %for.end, label %vector.body
4322
4323for.end:                                          ; preds = %vector.body
4324  ret void
4325
4326; AVX512BW-LABEL: test144:
4327; AVX512BW: vpminuw
4328}
4329
4330define void @test145(i32* nocapture %a, i32* nocapture %b) nounwind {
4331vector.ph:
4332  br label %vector.body
4333
4334vector.body:                                      ; preds = %vector.body, %vector.ph
4335  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4336  %gep.a = getelementptr inbounds i32* %a, i64 %index
4337  %gep.b = getelementptr inbounds i32* %b, i64 %index
4338  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
4339  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
4340  %load.a = load <16 x i32>* %ptr.a, align 2
4341  %load.b = load <16 x i32>* %ptr.b, align 2
4342  %cmp = icmp slt <16 x i32> %load.a, %load.b
4343  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
4344  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
4345  %index.next = add i64 %index, 8
4346  %loop = icmp eq i64 %index.next, 16384
4347  br i1 %loop, label %for.end, label %vector.body
4348
4349for.end:                                          ; preds = %vector.body
4350  ret void
4351
4352; AVX512F-LABEL: test145:
4353; AVX512F: vpmaxsd
4354}
4355
4356define void @test146(i32* nocapture %a, i32* nocapture %b) nounwind {
4357vector.ph:
4358  br label %vector.body
4359
4360vector.body:                                      ; preds = %vector.body, %vector.ph
4361  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4362  %gep.a = getelementptr inbounds i32* %a, i64 %index
4363  %gep.b = getelementptr inbounds i32* %b, i64 %index
4364  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
4365  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
4366  %load.a = load <16 x i32>* %ptr.a, align 2
4367  %load.b = load <16 x i32>* %ptr.b, align 2
4368  %cmp = icmp sle <16 x i32> %load.a, %load.b
4369  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
4370  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
4371  %index.next = add i64 %index, 8
4372  %loop = icmp eq i64 %index.next, 16384
4373  br i1 %loop, label %for.end, label %vector.body
4374
4375for.end:                                          ; preds = %vector.body
4376  ret void
4377
4378; AVX512F-LABEL: test146:
4379; AVX512F: vpmaxsd
4380}
4381
4382define void @test147(i32* nocapture %a, i32* nocapture %b) nounwind {
4383vector.ph:
4384  br label %vector.body
4385
4386vector.body:                                      ; preds = %vector.body, %vector.ph
4387  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4388  %gep.a = getelementptr inbounds i32* %a, i64 %index
4389  %gep.b = getelementptr inbounds i32* %b, i64 %index
4390  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
4391  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
4392  %load.a = load <16 x i32>* %ptr.a, align 2
4393  %load.b = load <16 x i32>* %ptr.b, align 2
4394  %cmp = icmp sgt <16 x i32> %load.a, %load.b
4395  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
4396  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
4397  %index.next = add i64 %index, 8
4398  %loop = icmp eq i64 %index.next, 16384
4399  br i1 %loop, label %for.end, label %vector.body
4400
4401for.end:                                          ; preds = %vector.body
4402  ret void
4403
4404; AVX512F-LABEL: test147:
4405; AVX512F: vpminsd
4406}
4407
4408define void @test148(i32* nocapture %a, i32* nocapture %b) nounwind {
4409vector.ph:
4410  br label %vector.body
4411
4412vector.body:                                      ; preds = %vector.body, %vector.ph
4413  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4414  %gep.a = getelementptr inbounds i32* %a, i64 %index
4415  %gep.b = getelementptr inbounds i32* %b, i64 %index
4416  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
4417  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
4418  %load.a = load <16 x i32>* %ptr.a, align 2
4419  %load.b = load <16 x i32>* %ptr.b, align 2
4420  %cmp = icmp sge <16 x i32> %load.a, %load.b
4421  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
4422  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
4423  %index.next = add i64 %index, 8
4424  %loop = icmp eq i64 %index.next, 16384
4425  br i1 %loop, label %for.end, label %vector.body
4426
4427for.end:                                          ; preds = %vector.body
4428  ret void
4429
4430; AVX512F-LABEL: test148:
4431; AVX512F: vpminsd
4432}
4433
4434define void @test149(i32* nocapture %a, i32* nocapture %b) nounwind {
4435vector.ph:
4436  br label %vector.body
4437
4438vector.body:                                      ; preds = %vector.body, %vector.ph
4439  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4440  %gep.a = getelementptr inbounds i32* %a, i64 %index
4441  %gep.b = getelementptr inbounds i32* %b, i64 %index
4442  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
4443  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
4444  %load.a = load <16 x i32>* %ptr.a, align 2
4445  %load.b = load <16 x i32>* %ptr.b, align 2
4446  %cmp = icmp ult <16 x i32> %load.a, %load.b
4447  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
4448  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
4449  %index.next = add i64 %index, 8
4450  %loop = icmp eq i64 %index.next, 16384
4451  br i1 %loop, label %for.end, label %vector.body
4452
4453for.end:                                          ; preds = %vector.body
4454  ret void
4455
4456; AVX512F-LABEL: test149:
4457; AVX512F: vpmaxud
4458}
4459
4460define void @test150(i32* nocapture %a, i32* nocapture %b) nounwind {
4461vector.ph:
4462  br label %vector.body
4463
4464vector.body:                                      ; preds = %vector.body, %vector.ph
4465  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4466  %gep.a = getelementptr inbounds i32* %a, i64 %index
4467  %gep.b = getelementptr inbounds i32* %b, i64 %index
4468  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
4469  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
4470  %load.a = load <16 x i32>* %ptr.a, align 2
4471  %load.b = load <16 x i32>* %ptr.b, align 2
4472  %cmp = icmp ule <16 x i32> %load.a, %load.b
4473  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
4474  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
4475  %index.next = add i64 %index, 8
4476  %loop = icmp eq i64 %index.next, 16384
4477  br i1 %loop, label %for.end, label %vector.body
4478
4479for.end:                                          ; preds = %vector.body
4480  ret void
4481
4482; AVX512F-LABEL: test150:
4483; AVX512F: vpmaxud
4484}
4485
4486define void @test151(i32* nocapture %a, i32* nocapture %b) nounwind {
4487vector.ph:
4488  br label %vector.body
4489
4490vector.body:                                      ; preds = %vector.body, %vector.ph
4491  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4492  %gep.a = getelementptr inbounds i32* %a, i64 %index
4493  %gep.b = getelementptr inbounds i32* %b, i64 %index
4494  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
4495  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
4496  %load.a = load <16 x i32>* %ptr.a, align 2
4497  %load.b = load <16 x i32>* %ptr.b, align 2
4498  %cmp = icmp ugt <16 x i32> %load.a, %load.b
4499  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
4500  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
4501  %index.next = add i64 %index, 8
4502  %loop = icmp eq i64 %index.next, 16384
4503  br i1 %loop, label %for.end, label %vector.body
4504
4505for.end:                                          ; preds = %vector.body
4506  ret void
4507
4508; AVX512F-LABEL: test151:
4509; AVX512F: vpminud
4510}
4511
4512define void @test152(i32* nocapture %a, i32* nocapture %b) nounwind {
4513vector.ph:
4514  br label %vector.body
4515
4516vector.body:                                      ; preds = %vector.body, %vector.ph
4517  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4518  %gep.a = getelementptr inbounds i32* %a, i64 %index
4519  %gep.b = getelementptr inbounds i32* %b, i64 %index
4520  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
4521  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
4522  %load.a = load <16 x i32>* %ptr.a, align 2
4523  %load.b = load <16 x i32>* %ptr.b, align 2
4524  %cmp = icmp uge <16 x i32> %load.a, %load.b
4525  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
4526  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
4527  %index.next = add i64 %index, 8
4528  %loop = icmp eq i64 %index.next, 16384
4529  br i1 %loop, label %for.end, label %vector.body
4530
4531for.end:                                          ; preds = %vector.body
4532  ret void
4533
4534; AVX512F-LABEL: test152:
4535; AVX512F: vpminud
4536}
4537
4538; -----------------------
4539
4540define void @test153(i32* nocapture %a, i32* nocapture %b) nounwind {
4541vector.ph:
4542  br label %vector.body
4543
4544vector.body:                                      ; preds = %vector.body, %vector.ph
4545  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4546  %gep.a = getelementptr inbounds i32* %a, i64 %index
4547  %gep.b = getelementptr inbounds i32* %b, i64 %index
4548  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
4549  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
4550  %load.a = load <8 x i64>* %ptr.a, align 2
4551  %load.b = load <8 x i64>* %ptr.b, align 2
4552  %cmp = icmp slt <8 x i64> %load.a, %load.b
4553  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
4554  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
4555  %index.next = add i64 %index, 8
4556  %loop = icmp eq i64 %index.next, 16384
4557  br i1 %loop, label %for.end, label %vector.body
4558
4559for.end:                                          ; preds = %vector.body
4560  ret void
4561
4562; AVX512F-LABEL: test153:
4563; AVX512F: vpmaxsq
4564}
4565
4566define void @test154(i32* nocapture %a, i32* nocapture %b) nounwind {
4567vector.ph:
4568  br label %vector.body
4569
4570vector.body:                                      ; preds = %vector.body, %vector.ph
4571  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4572  %gep.a = getelementptr inbounds i32* %a, i64 %index
4573  %gep.b = getelementptr inbounds i32* %b, i64 %index
4574  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
4575  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
4576  %load.a = load <8 x i64>* %ptr.a, align 2
4577  %load.b = load <8 x i64>* %ptr.b, align 2
4578  %cmp = icmp sle <8 x i64> %load.a, %load.b
4579  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
4580  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
4581  %index.next = add i64 %index, 8
4582  %loop = icmp eq i64 %index.next, 16384
4583  br i1 %loop, label %for.end, label %vector.body
4584
4585for.end:                                          ; preds = %vector.body
4586  ret void
4587
4588; AVX512F-LABEL: test154:
4589; AVX512F: vpmaxsq
4590}
4591
4592define void @test155(i32* nocapture %a, i32* nocapture %b) nounwind {
4593vector.ph:
4594  br label %vector.body
4595
4596vector.body:                                      ; preds = %vector.body, %vector.ph
4597  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4598  %gep.a = getelementptr inbounds i32* %a, i64 %index
4599  %gep.b = getelementptr inbounds i32* %b, i64 %index
4600  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
4601  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
4602  %load.a = load <8 x i64>* %ptr.a, align 2
4603  %load.b = load <8 x i64>* %ptr.b, align 2
4604  %cmp = icmp sgt <8 x i64> %load.a, %load.b
4605  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
4606  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
4607  %index.next = add i64 %index, 8
4608  %loop = icmp eq i64 %index.next, 16384
4609  br i1 %loop, label %for.end, label %vector.body
4610
4611for.end:                                          ; preds = %vector.body
4612  ret void
4613
4614; AVX512F-LABEL: test155:
4615; AVX512F: vpminsq
4616}
4617
4618define void @test156(i32* nocapture %a, i32* nocapture %b) nounwind {
4619vector.ph:
4620  br label %vector.body
4621
4622vector.body:                                      ; preds = %vector.body, %vector.ph
4623  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4624  %gep.a = getelementptr inbounds i32* %a, i64 %index
4625  %gep.b = getelementptr inbounds i32* %b, i64 %index
4626  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
4627  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
4628  %load.a = load <8 x i64>* %ptr.a, align 2
4629  %load.b = load <8 x i64>* %ptr.b, align 2
4630  %cmp = icmp sge <8 x i64> %load.a, %load.b
4631  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
4632  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
4633  %index.next = add i64 %index, 8
4634  %loop = icmp eq i64 %index.next, 16384
4635  br i1 %loop, label %for.end, label %vector.body
4636
4637for.end:                                          ; preds = %vector.body
4638  ret void
4639
4640; AVX512F-LABEL: test156:
4641; AVX512F: vpminsq
4642}
4643
4644define void @test157(i32* nocapture %a, i32* nocapture %b) nounwind {
4645vector.ph:
4646  br label %vector.body
4647
4648vector.body:                                      ; preds = %vector.body, %vector.ph
4649  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4650  %gep.a = getelementptr inbounds i32* %a, i64 %index
4651  %gep.b = getelementptr inbounds i32* %b, i64 %index
4652  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
4653  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
4654  %load.a = load <8 x i64>* %ptr.a, align 2
4655  %load.b = load <8 x i64>* %ptr.b, align 2
4656  %cmp = icmp ult <8 x i64> %load.a, %load.b
4657  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
4658  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
4659  %index.next = add i64 %index, 8
4660  %loop = icmp eq i64 %index.next, 16384
4661  br i1 %loop, label %for.end, label %vector.body
4662
4663for.end:                                          ; preds = %vector.body
4664  ret void
4665
4666; AVX512F-LABEL: test157:
4667; AVX512F: vpmaxuq
4668}
4669
4670define void @test158(i32* nocapture %a, i32* nocapture %b) nounwind {
4671vector.ph:
4672  br label %vector.body
4673
4674vector.body:                                      ; preds = %vector.body, %vector.ph
4675  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4676  %gep.a = getelementptr inbounds i32* %a, i64 %index
4677  %gep.b = getelementptr inbounds i32* %b, i64 %index
4678  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
4679  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
4680  %load.a = load <8 x i64>* %ptr.a, align 2
4681  %load.b = load <8 x i64>* %ptr.b, align 2
4682  %cmp = icmp ule <8 x i64> %load.a, %load.b
4683  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
4684  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
4685  %index.next = add i64 %index, 8
4686  %loop = icmp eq i64 %index.next, 16384
4687  br i1 %loop, label %for.end, label %vector.body
4688
4689for.end:                                          ; preds = %vector.body
4690  ret void
4691
4692; AVX512F-LABEL: test158:
4693; AVX512F: vpmaxuq
4694}
4695
4696define void @test159(i32* nocapture %a, i32* nocapture %b) nounwind {
4697vector.ph:
4698  br label %vector.body
4699
4700vector.body:                                      ; preds = %vector.body, %vector.ph
4701  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4702  %gep.a = getelementptr inbounds i32* %a, i64 %index
4703  %gep.b = getelementptr inbounds i32* %b, i64 %index
4704  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
4705  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
4706  %load.a = load <8 x i64>* %ptr.a, align 2
4707  %load.b = load <8 x i64>* %ptr.b, align 2
4708  %cmp = icmp ugt <8 x i64> %load.a, %load.b
4709  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
4710  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
4711  %index.next = add i64 %index, 8
4712  %loop = icmp eq i64 %index.next, 16384
4713  br i1 %loop, label %for.end, label %vector.body
4714
4715for.end:                                          ; preds = %vector.body
4716  ret void
4717
4718; AVX512F-LABEL: test159:
4719; AVX512F: vpminuq
4720}
4721
4722define void @test160(i32* nocapture %a, i32* nocapture %b) nounwind {
4723vector.ph:
4724  br label %vector.body
4725
4726vector.body:                                      ; preds = %vector.body, %vector.ph
4727  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4728  %gep.a = getelementptr inbounds i32* %a, i64 %index
4729  %gep.b = getelementptr inbounds i32* %b, i64 %index
4730  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
4731  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
4732  %load.a = load <8 x i64>* %ptr.a, align 2
4733  %load.b = load <8 x i64>* %ptr.b, align 2
4734  %cmp = icmp uge <8 x i64> %load.a, %load.b
4735  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
4736  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
4737  %index.next = add i64 %index, 8
4738  %loop = icmp eq i64 %index.next, 16384
4739  br i1 %loop, label %for.end, label %vector.body
4740
4741for.end:                                          ; preds = %vector.body
4742  ret void
4743
4744; AVX512F-LABEL: test160:
4745; AVX512F: vpminuq
4746}
4747
4748define void @test161(i32* nocapture %a, i32* nocapture %b) nounwind {
4749vector.ph:
4750  br label %vector.body
4751
4752vector.body:                                      ; preds = %vector.body, %vector.ph
4753  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4754  %gep.a = getelementptr inbounds i32* %a, i64 %index
4755  %gep.b = getelementptr inbounds i32* %b, i64 %index
4756  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4757  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4758  %load.a = load <4 x i64>* %ptr.a, align 2
4759  %load.b = load <4 x i64>* %ptr.b, align 2
4760  %cmp = icmp slt <4 x i64> %load.a, %load.b
4761  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
4762  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4763  %index.next = add i64 %index, 8
4764  %loop = icmp eq i64 %index.next, 16384
4765  br i1 %loop, label %for.end, label %vector.body
4766
4767for.end:                                          ; preds = %vector.body
4768  ret void
4769
4770; AVX512VL-LABEL: test161:
4771; AVX512VL: vpminsq
4772}
4773
4774define void @test162(i32* nocapture %a, i32* nocapture %b) nounwind {
4775vector.ph:
4776  br label %vector.body
4777
4778vector.body:                                      ; preds = %vector.body, %vector.ph
4779  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4780  %gep.a = getelementptr inbounds i32* %a, i64 %index
4781  %gep.b = getelementptr inbounds i32* %b, i64 %index
4782  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4783  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4784  %load.a = load <4 x i64>* %ptr.a, align 2
4785  %load.b = load <4 x i64>* %ptr.b, align 2
4786  %cmp = icmp sle <4 x i64> %load.a, %load.b
4787  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
4788  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4789  %index.next = add i64 %index, 8
4790  %loop = icmp eq i64 %index.next, 16384
4791  br i1 %loop, label %for.end, label %vector.body
4792
4793for.end:                                          ; preds = %vector.body
4794  ret void
4795
4796; AVX512VL-LABEL: test162:
4797; AVX512VL: vpminsq
4798}
4799
4800define void @test163(i32* nocapture %a, i32* nocapture %b) nounwind {
4801vector.ph:
4802  br label %vector.body
4803
4804vector.body:                                      ; preds = %vector.body, %vector.ph
4805  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4806  %gep.a = getelementptr inbounds i32* %a, i64 %index
4807  %gep.b = getelementptr inbounds i32* %b, i64 %index
4808  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4809  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4810  %load.a = load <4 x i64>* %ptr.a, align 2
4811  %load.b = load <4 x i64>* %ptr.b, align 2
4812  %cmp = icmp sgt <4 x i64> %load.a, %load.b
4813  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
4814  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4815  %index.next = add i64 %index, 8
4816  %loop = icmp eq i64 %index.next, 16384
4817  br i1 %loop, label %for.end, label %vector.body
4818
4819for.end:                                          ; preds = %vector.body
4820  ret void
4821
4822; AVX512VL-LABEL: test163:
4823; AVX512VL: vpmaxsq
4824}
4825
4826define void @test164(i32* nocapture %a, i32* nocapture %b) nounwind {
4827vector.ph:
4828  br label %vector.body
4829
4830vector.body:                                      ; preds = %vector.body, %vector.ph
4831  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4832  %gep.a = getelementptr inbounds i32* %a, i64 %index
4833  %gep.b = getelementptr inbounds i32* %b, i64 %index
4834  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4835  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4836  %load.a = load <4 x i64>* %ptr.a, align 2
4837  %load.b = load <4 x i64>* %ptr.b, align 2
4838  %cmp = icmp sge <4 x i64> %load.a, %load.b
4839  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
4840  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4841  %index.next = add i64 %index, 8
4842  %loop = icmp eq i64 %index.next, 16384
4843  br i1 %loop, label %for.end, label %vector.body
4844
4845for.end:                                          ; preds = %vector.body
4846  ret void
4847
4848; AVX512VL-LABEL: test164:
4849; AVX512VL: vpmaxsq
4850}
4851
4852define void @test165(i32* nocapture %a, i32* nocapture %b) nounwind {
4853vector.ph:
4854  br label %vector.body
4855
4856vector.body:                                      ; preds = %vector.body, %vector.ph
4857  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4858  %gep.a = getelementptr inbounds i32* %a, i64 %index
4859  %gep.b = getelementptr inbounds i32* %b, i64 %index
4860  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4861  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4862  %load.a = load <4 x i64>* %ptr.a, align 2
4863  %load.b = load <4 x i64>* %ptr.b, align 2
4864  %cmp = icmp ult <4 x i64> %load.a, %load.b
4865  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
4866  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4867  %index.next = add i64 %index, 8
4868  %loop = icmp eq i64 %index.next, 16384
4869  br i1 %loop, label %for.end, label %vector.body
4870
4871for.end:                                          ; preds = %vector.body
4872  ret void
4873
4874; AVX512VL-LABEL: test165:
4875; AVX512VL: vpminuq
4876}
4877
4878define void @test166(i32* nocapture %a, i32* nocapture %b) nounwind {
4879vector.ph:
4880  br label %vector.body
4881
4882vector.body:                                      ; preds = %vector.body, %vector.ph
4883  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4884  %gep.a = getelementptr inbounds i32* %a, i64 %index
4885  %gep.b = getelementptr inbounds i32* %b, i64 %index
4886  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4887  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4888  %load.a = load <4 x i64>* %ptr.a, align 2
4889  %load.b = load <4 x i64>* %ptr.b, align 2
4890  %cmp = icmp ule <4 x i64> %load.a, %load.b
4891  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
4892  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4893  %index.next = add i64 %index, 8
4894  %loop = icmp eq i64 %index.next, 16384
4895  br i1 %loop, label %for.end, label %vector.body
4896
4897for.end:                                          ; preds = %vector.body
4898  ret void
4899
4900; AVX512VL-LABEL: test166:
4901; AVX512VL: vpminuq
4902}
4903
4904define void @test167(i32* nocapture %a, i32* nocapture %b) nounwind {
4905vector.ph:
4906  br label %vector.body
4907
4908vector.body:                                      ; preds = %vector.body, %vector.ph
4909  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4910  %gep.a = getelementptr inbounds i32* %a, i64 %index
4911  %gep.b = getelementptr inbounds i32* %b, i64 %index
4912  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4913  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4914  %load.a = load <4 x i64>* %ptr.a, align 2
4915  %load.b = load <4 x i64>* %ptr.b, align 2
4916  %cmp = icmp ugt <4 x i64> %load.a, %load.b
4917  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
4918  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4919  %index.next = add i64 %index, 8
4920  %loop = icmp eq i64 %index.next, 16384
4921  br i1 %loop, label %for.end, label %vector.body
4922
4923for.end:                                          ; preds = %vector.body
4924  ret void
4925
4926; AVX512VL-LABEL: test167:
4927; AVX512VL: vpmaxuq
4928}
4929
4930define void @test168(i32* nocapture %a, i32* nocapture %b) nounwind {
4931vector.ph:
4932  br label %vector.body
4933
4934vector.body:                                      ; preds = %vector.body, %vector.ph
4935  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4936  %gep.a = getelementptr inbounds i32* %a, i64 %index
4937  %gep.b = getelementptr inbounds i32* %b, i64 %index
4938  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4939  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4940  %load.a = load <4 x i64>* %ptr.a, align 2
4941  %load.b = load <4 x i64>* %ptr.b, align 2
4942  %cmp = icmp uge <4 x i64> %load.a, %load.b
4943  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
4944  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4945  %index.next = add i64 %index, 8
4946  %loop = icmp eq i64 %index.next, 16384
4947  br i1 %loop, label %for.end, label %vector.body
4948
4949for.end:                                          ; preds = %vector.body
4950  ret void
4951
4952; AVX512VL-LABEL: test168:
4953; AVX512VL: vpmaxuq
4954}
4955
4956define void @test169(i32* nocapture %a, i32* nocapture %b) nounwind {
4957vector.ph:
4958  br label %vector.body
4959
4960vector.body:                                      ; preds = %vector.body, %vector.ph
4961  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4962  %gep.a = getelementptr inbounds i32* %a, i64 %index
4963  %gep.b = getelementptr inbounds i32* %b, i64 %index
4964  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4965  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4966  %load.a = load <4 x i64>* %ptr.a, align 2
4967  %load.b = load <4 x i64>* %ptr.b, align 2
4968  %cmp = icmp slt <4 x i64> %load.a, %load.b
4969  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
4970  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4971  %index.next = add i64 %index, 8
4972  %loop = icmp eq i64 %index.next, 16384
4973  br i1 %loop, label %for.end, label %vector.body
4974
4975for.end:                                          ; preds = %vector.body
4976  ret void
4977
4978; AVX512VL-LABEL: test169:
4979; AVX512VL: vpmaxsq
4980}
4981
4982define void @test170(i32* nocapture %a, i32* nocapture %b) nounwind {
4983vector.ph:
4984  br label %vector.body
4985
4986vector.body:                                      ; preds = %vector.body, %vector.ph
4987  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
4988  %gep.a = getelementptr inbounds i32* %a, i64 %index
4989  %gep.b = getelementptr inbounds i32* %b, i64 %index
4990  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
4991  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
4992  %load.a = load <4 x i64>* %ptr.a, align 2
4993  %load.b = load <4 x i64>* %ptr.b, align 2
4994  %cmp = icmp sle <4 x i64> %load.a, %load.b
4995  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
4996  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
4997  %index.next = add i64 %index, 8
4998  %loop = icmp eq i64 %index.next, 16384
4999  br i1 %loop, label %for.end, label %vector.body
5000
5001for.end:                                          ; preds = %vector.body
5002  ret void
5003
5004; AVX512VL-LABEL: test170:
5005; AVX512VL: vpmaxsq
5006}
5007
5008define void @test171(i32* nocapture %a, i32* nocapture %b) nounwind {
5009vector.ph:
5010  br label %vector.body
5011
5012vector.body:                                      ; preds = %vector.body, %vector.ph
5013  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5014  %gep.a = getelementptr inbounds i32* %a, i64 %index
5015  %gep.b = getelementptr inbounds i32* %b, i64 %index
5016  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
5017  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
5018  %load.a = load <4 x i64>* %ptr.a, align 2
5019  %load.b = load <4 x i64>* %ptr.b, align 2
5020  %cmp = icmp sgt <4 x i64> %load.a, %load.b
5021  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
5022  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
5023  %index.next = add i64 %index, 8
5024  %loop = icmp eq i64 %index.next, 16384
5025  br i1 %loop, label %for.end, label %vector.body
5026
5027for.end:                                          ; preds = %vector.body
5028  ret void
5029
5030; AVX512VL-LABEL: test171:
5031; AVX512VL: vpminsq
5032}
5033
5034define void @test172(i32* nocapture %a, i32* nocapture %b) nounwind {
5035vector.ph:
5036  br label %vector.body
5037
5038vector.body:                                      ; preds = %vector.body, %vector.ph
5039  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5040  %gep.a = getelementptr inbounds i32* %a, i64 %index
5041  %gep.b = getelementptr inbounds i32* %b, i64 %index
5042  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
5043  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
5044  %load.a = load <4 x i64>* %ptr.a, align 2
5045  %load.b = load <4 x i64>* %ptr.b, align 2
5046  %cmp = icmp sge <4 x i64> %load.a, %load.b
5047  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
5048  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
5049  %index.next = add i64 %index, 8
5050  %loop = icmp eq i64 %index.next, 16384
5051  br i1 %loop, label %for.end, label %vector.body
5052
5053for.end:                                          ; preds = %vector.body
5054  ret void
5055
5056; AVX512VL-LABEL: test172:
5057; AVX512VL: vpminsq
5058}
5059
5060define void @test173(i32* nocapture %a, i32* nocapture %b) nounwind {
5061vector.ph:
5062  br label %vector.body
5063
5064vector.body:                                      ; preds = %vector.body, %vector.ph
5065  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5066  %gep.a = getelementptr inbounds i32* %a, i64 %index
5067  %gep.b = getelementptr inbounds i32* %b, i64 %index
5068  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
5069  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
5070  %load.a = load <4 x i64>* %ptr.a, align 2
5071  %load.b = load <4 x i64>* %ptr.b, align 2
5072  %cmp = icmp ult <4 x i64> %load.a, %load.b
5073  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
5074  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
5075  %index.next = add i64 %index, 8
5076  %loop = icmp eq i64 %index.next, 16384
5077  br i1 %loop, label %for.end, label %vector.body
5078
5079for.end:                                          ; preds = %vector.body
5080  ret void
5081
5082; AVX512VL-LABEL: test173:
5083; AVX512VL: vpmaxuq
5084}
5085
5086define void @test174(i32* nocapture %a, i32* nocapture %b) nounwind {
5087vector.ph:
5088  br label %vector.body
5089
5090vector.body:                                      ; preds = %vector.body, %vector.ph
5091  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5092  %gep.a = getelementptr inbounds i32* %a, i64 %index
5093  %gep.b = getelementptr inbounds i32* %b, i64 %index
5094  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
5095  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
5096  %load.a = load <4 x i64>* %ptr.a, align 2
5097  %load.b = load <4 x i64>* %ptr.b, align 2
5098  %cmp = icmp ule <4 x i64> %load.a, %load.b
5099  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
5100  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
5101  %index.next = add i64 %index, 8
5102  %loop = icmp eq i64 %index.next, 16384
5103  br i1 %loop, label %for.end, label %vector.body
5104
5105for.end:                                          ; preds = %vector.body
5106  ret void
5107
5108; AVX512VL-LABEL: test174:
5109; AVX512VL: vpmaxuq
5110}
5111
5112define void @test175(i32* nocapture %a, i32* nocapture %b) nounwind {
5113vector.ph:
5114  br label %vector.body
5115
5116vector.body:                                      ; preds = %vector.body, %vector.ph
5117  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5118  %gep.a = getelementptr inbounds i32* %a, i64 %index
5119  %gep.b = getelementptr inbounds i32* %b, i64 %index
5120  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
5121  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
5122  %load.a = load <4 x i64>* %ptr.a, align 2
5123  %load.b = load <4 x i64>* %ptr.b, align 2
5124  %cmp = icmp ugt <4 x i64> %load.a, %load.b
5125  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
5126  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
5127  %index.next = add i64 %index, 8
5128  %loop = icmp eq i64 %index.next, 16384
5129  br i1 %loop, label %for.end, label %vector.body
5130
5131for.end:                                          ; preds = %vector.body
5132  ret void
5133
5134; AVX512VL-LABEL: test175:
5135; AVX512VL: vpminuq
5136}
5137
5138define void @test176(i32* nocapture %a, i32* nocapture %b) nounwind {
5139vector.ph:
5140  br label %vector.body
5141
5142vector.body:                                      ; preds = %vector.body, %vector.ph
5143  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5144  %gep.a = getelementptr inbounds i32* %a, i64 %index
5145  %gep.b = getelementptr inbounds i32* %b, i64 %index
5146  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
5147  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
5148  %load.a = load <4 x i64>* %ptr.a, align 2
5149  %load.b = load <4 x i64>* %ptr.b, align 2
5150  %cmp = icmp uge <4 x i64> %load.a, %load.b
5151  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
5152  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
5153  %index.next = add i64 %index, 8
5154  %loop = icmp eq i64 %index.next, 16384
5155  br i1 %loop, label %for.end, label %vector.body
5156
5157for.end:                                          ; preds = %vector.body
5158  ret void
5159
5160; AVX512VL-LABEL: test176:
5161; AVX512VL: vpminuq
5162}
5163
5164define void @test177(i32* nocapture %a, i32* nocapture %b) nounwind {
5165vector.ph:
5166  br label %vector.body
5167
5168vector.body:                                      ; preds = %vector.body, %vector.ph
5169  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5170  %gep.a = getelementptr inbounds i32* %a, i64 %index
5171  %gep.b = getelementptr inbounds i32* %b, i64 %index
5172  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5173  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5174  %load.a = load <2 x i64>* %ptr.a, align 2
5175  %load.b = load <2 x i64>* %ptr.b, align 2
5176  %cmp = icmp slt <2 x i64> %load.a, %load.b
5177  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
5178  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5179  %index.next = add i64 %index, 8
5180  %loop = icmp eq i64 %index.next, 16384
5181  br i1 %loop, label %for.end, label %vector.body
5182
5183for.end:                                          ; preds = %vector.body
5184  ret void
5185
5186; AVX512VL-LABEL: test177:
5187; AVX512VL: vpminsq
5188}
5189
5190define void @test178(i32* nocapture %a, i32* nocapture %b) nounwind {
5191vector.ph:
5192  br label %vector.body
5193
5194vector.body:                                      ; preds = %vector.body, %vector.ph
5195  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5196  %gep.a = getelementptr inbounds i32* %a, i64 %index
5197  %gep.b = getelementptr inbounds i32* %b, i64 %index
5198  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5199  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5200  %load.a = load <2 x i64>* %ptr.a, align 2
5201  %load.b = load <2 x i64>* %ptr.b, align 2
5202  %cmp = icmp sle <2 x i64> %load.a, %load.b
5203  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
5204  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5205  %index.next = add i64 %index, 8
5206  %loop = icmp eq i64 %index.next, 16384
5207  br i1 %loop, label %for.end, label %vector.body
5208
5209for.end:                                          ; preds = %vector.body
5210  ret void
5211
5212; AVX512VL-LABEL: test178:
5213; AVX512VL: vpminsq
5214}
5215
5216define void @test179(i32* nocapture %a, i32* nocapture %b) nounwind {
5217vector.ph:
5218  br label %vector.body
5219
5220vector.body:                                      ; preds = %vector.body, %vector.ph
5221  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5222  %gep.a = getelementptr inbounds i32* %a, i64 %index
5223  %gep.b = getelementptr inbounds i32* %b, i64 %index
5224  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5225  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5226  %load.a = load <2 x i64>* %ptr.a, align 2
5227  %load.b = load <2 x i64>* %ptr.b, align 2
5228  %cmp = icmp sgt <2 x i64> %load.a, %load.b
5229  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
5230  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5231  %index.next = add i64 %index, 8
5232  %loop = icmp eq i64 %index.next, 16384
5233  br i1 %loop, label %for.end, label %vector.body
5234
5235for.end:                                          ; preds = %vector.body
5236  ret void
5237
5238; AVX512VL-LABEL: test179:
5239; AVX512VL: vpmaxsq
5240}
5241
5242define void @test180(i32* nocapture %a, i32* nocapture %b) nounwind {
5243vector.ph:
5244  br label %vector.body
5245
5246vector.body:                                      ; preds = %vector.body, %vector.ph
5247  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5248  %gep.a = getelementptr inbounds i32* %a, i64 %index
5249  %gep.b = getelementptr inbounds i32* %b, i64 %index
5250  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5251  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5252  %load.a = load <2 x i64>* %ptr.a, align 2
5253  %load.b = load <2 x i64>* %ptr.b, align 2
5254  %cmp = icmp sge <2 x i64> %load.a, %load.b
5255  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
5256  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5257  %index.next = add i64 %index, 8
5258  %loop = icmp eq i64 %index.next, 16384
5259  br i1 %loop, label %for.end, label %vector.body
5260
5261for.end:                                          ; preds = %vector.body
5262  ret void
5263
5264; AVX512VL-LABEL: test180:
5265; AVX512VL: vpmaxsq
5266}
5267
5268define void @test181(i32* nocapture %a, i32* nocapture %b) nounwind {
5269vector.ph:
5270  br label %vector.body
5271
5272vector.body:                                      ; preds = %vector.body, %vector.ph
5273  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5274  %gep.a = getelementptr inbounds i32* %a, i64 %index
5275  %gep.b = getelementptr inbounds i32* %b, i64 %index
5276  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5277  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5278  %load.a = load <2 x i64>* %ptr.a, align 2
5279  %load.b = load <2 x i64>* %ptr.b, align 2
5280  %cmp = icmp ult <2 x i64> %load.a, %load.b
5281  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
5282  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5283  %index.next = add i64 %index, 8
5284  %loop = icmp eq i64 %index.next, 16384
5285  br i1 %loop, label %for.end, label %vector.body
5286
5287for.end:                                          ; preds = %vector.body
5288  ret void
5289
5290; AVX512VL-LABEL: test181:
5291; AVX512VL: vpminuq
5292}
5293
5294define void @test182(i32* nocapture %a, i32* nocapture %b) nounwind {
5295vector.ph:
5296  br label %vector.body
5297
5298vector.body:                                      ; preds = %vector.body, %vector.ph
5299  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5300  %gep.a = getelementptr inbounds i32* %a, i64 %index
5301  %gep.b = getelementptr inbounds i32* %b, i64 %index
5302  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5303  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5304  %load.a = load <2 x i64>* %ptr.a, align 2
5305  %load.b = load <2 x i64>* %ptr.b, align 2
5306  %cmp = icmp ule <2 x i64> %load.a, %load.b
5307  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
5308  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5309  %index.next = add i64 %index, 8
5310  %loop = icmp eq i64 %index.next, 16384
5311  br i1 %loop, label %for.end, label %vector.body
5312
5313for.end:                                          ; preds = %vector.body
5314  ret void
5315
5316; AVX512VL-LABEL: test182:
5317; AVX512VL: vpminuq
5318}
5319
5320define void @test183(i32* nocapture %a, i32* nocapture %b) nounwind {
5321vector.ph:
5322  br label %vector.body
5323
5324vector.body:                                      ; preds = %vector.body, %vector.ph
5325  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5326  %gep.a = getelementptr inbounds i32* %a, i64 %index
5327  %gep.b = getelementptr inbounds i32* %b, i64 %index
5328  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5329  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5330  %load.a = load <2 x i64>* %ptr.a, align 2
5331  %load.b = load <2 x i64>* %ptr.b, align 2
5332  %cmp = icmp ugt <2 x i64> %load.a, %load.b
5333  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
5334  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5335  %index.next = add i64 %index, 8
5336  %loop = icmp eq i64 %index.next, 16384
5337  br i1 %loop, label %for.end, label %vector.body
5338
5339for.end:                                          ; preds = %vector.body
5340  ret void
5341
5342; AVX512VL-LABEL: test183:
5343; AVX512VL: vpmaxuq
5344}
5345
5346define void @test184(i32* nocapture %a, i32* nocapture %b) nounwind {
5347vector.ph:
5348  br label %vector.body
5349
5350vector.body:                                      ; preds = %vector.body, %vector.ph
5351  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5352  %gep.a = getelementptr inbounds i32* %a, i64 %index
5353  %gep.b = getelementptr inbounds i32* %b, i64 %index
5354  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5355  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5356  %load.a = load <2 x i64>* %ptr.a, align 2
5357  %load.b = load <2 x i64>* %ptr.b, align 2
5358  %cmp = icmp uge <2 x i64> %load.a, %load.b
5359  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
5360  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5361  %index.next = add i64 %index, 8
5362  %loop = icmp eq i64 %index.next, 16384
5363  br i1 %loop, label %for.end, label %vector.body
5364
5365for.end:                                          ; preds = %vector.body
5366  ret void
5367
5368; AVX512VL-LABEL: test184:
5369; AVX512VL: vpmaxuq
5370}
5371
5372define void @test185(i32* nocapture %a, i32* nocapture %b) nounwind {
5373vector.ph:
5374  br label %vector.body
5375
5376vector.body:                                      ; preds = %vector.body, %vector.ph
5377  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5378  %gep.a = getelementptr inbounds i32* %a, i64 %index
5379  %gep.b = getelementptr inbounds i32* %b, i64 %index
5380  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5381  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5382  %load.a = load <2 x i64>* %ptr.a, align 2
5383  %load.b = load <2 x i64>* %ptr.b, align 2
5384  %cmp = icmp slt <2 x i64> %load.a, %load.b
5385  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
5386  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5387  %index.next = add i64 %index, 8
5388  %loop = icmp eq i64 %index.next, 16384
5389  br i1 %loop, label %for.end, label %vector.body
5390
5391for.end:                                          ; preds = %vector.body
5392  ret void
5393
5394; AVX512VL-LABEL: test185:
5395; AVX512VL: vpmaxsq
5396}
5397
5398define void @test186(i32* nocapture %a, i32* nocapture %b) nounwind {
5399vector.ph:
5400  br label %vector.body
5401
5402vector.body:                                      ; preds = %vector.body, %vector.ph
5403  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5404  %gep.a = getelementptr inbounds i32* %a, i64 %index
5405  %gep.b = getelementptr inbounds i32* %b, i64 %index
5406  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5407  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5408  %load.a = load <2 x i64>* %ptr.a, align 2
5409  %load.b = load <2 x i64>* %ptr.b, align 2
5410  %cmp = icmp sle <2 x i64> %load.a, %load.b
5411  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
5412  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5413  %index.next = add i64 %index, 8
5414  %loop = icmp eq i64 %index.next, 16384
5415  br i1 %loop, label %for.end, label %vector.body
5416
5417for.end:                                          ; preds = %vector.body
5418  ret void
5419
5420; AVX512VL-LABEL: test186:
5421; AVX512VL: vpmaxsq
5422}
5423
5424define void @test187(i32* nocapture %a, i32* nocapture %b) nounwind {
5425vector.ph:
5426  br label %vector.body
5427
5428vector.body:                                      ; preds = %vector.body, %vector.ph
5429  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5430  %gep.a = getelementptr inbounds i32* %a, i64 %index
5431  %gep.b = getelementptr inbounds i32* %b, i64 %index
5432  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5433  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5434  %load.a = load <2 x i64>* %ptr.a, align 2
5435  %load.b = load <2 x i64>* %ptr.b, align 2
5436  %cmp = icmp sgt <2 x i64> %load.a, %load.b
5437  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
5438  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5439  %index.next = add i64 %index, 8
5440  %loop = icmp eq i64 %index.next, 16384
5441  br i1 %loop, label %for.end, label %vector.body
5442
5443for.end:                                          ; preds = %vector.body
5444  ret void
5445
5446; AVX512VL-LABEL: test187:
5447; AVX512VL: vpminsq
5448}
5449
5450define void @test188(i32* nocapture %a, i32* nocapture %b) nounwind {
5451vector.ph:
5452  br label %vector.body
5453
5454vector.body:                                      ; preds = %vector.body, %vector.ph
5455  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5456  %gep.a = getelementptr inbounds i32* %a, i64 %index
5457  %gep.b = getelementptr inbounds i32* %b, i64 %index
5458  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5459  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5460  %load.a = load <2 x i64>* %ptr.a, align 2
5461  %load.b = load <2 x i64>* %ptr.b, align 2
5462  %cmp = icmp sge <2 x i64> %load.a, %load.b
5463  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
5464  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5465  %index.next = add i64 %index, 8
5466  %loop = icmp eq i64 %index.next, 16384
5467  br i1 %loop, label %for.end, label %vector.body
5468
5469for.end:                                          ; preds = %vector.body
5470  ret void
5471
5472; AVX512VL-LABEL: test188:
5473; AVX512VL: vpminsq
5474}
5475
5476define void @test189(i32* nocapture %a, i32* nocapture %b) nounwind {
5477vector.ph:
5478  br label %vector.body
5479
5480vector.body:                                      ; preds = %vector.body, %vector.ph
5481  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5482  %gep.a = getelementptr inbounds i32* %a, i64 %index
5483  %gep.b = getelementptr inbounds i32* %b, i64 %index
5484  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5485  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5486  %load.a = load <2 x i64>* %ptr.a, align 2
5487  %load.b = load <2 x i64>* %ptr.b, align 2
5488  %cmp = icmp ult <2 x i64> %load.a, %load.b
5489  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
5490  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5491  %index.next = add i64 %index, 8
5492  %loop = icmp eq i64 %index.next, 16384
5493  br i1 %loop, label %for.end, label %vector.body
5494
5495for.end:                                          ; preds = %vector.body
5496  ret void
5497
5498; AVX512VL-LABEL: test189:
5499; AVX512VL: vpmaxuq
5500}
5501
5502define void @test190(i32* nocapture %a, i32* nocapture %b) nounwind {
5503vector.ph:
5504  br label %vector.body
5505
5506vector.body:                                      ; preds = %vector.body, %vector.ph
5507  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5508  %gep.a = getelementptr inbounds i32* %a, i64 %index
5509  %gep.b = getelementptr inbounds i32* %b, i64 %index
5510  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5511  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5512  %load.a = load <2 x i64>* %ptr.a, align 2
5513  %load.b = load <2 x i64>* %ptr.b, align 2
5514  %cmp = icmp ule <2 x i64> %load.a, %load.b
5515  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
5516  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5517  %index.next = add i64 %index, 8
5518  %loop = icmp eq i64 %index.next, 16384
5519  br i1 %loop, label %for.end, label %vector.body
5520
5521for.end:                                          ; preds = %vector.body
5522  ret void
5523
5524; AVX512VL-LABEL: test190:
5525; AVX512VL: vpmaxuq
5526}
5527
5528define void @test191(i32* nocapture %a, i32* nocapture %b) nounwind {
5529vector.ph:
5530  br label %vector.body
5531
5532vector.body:                                      ; preds = %vector.body, %vector.ph
5533  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5534  %gep.a = getelementptr inbounds i32* %a, i64 %index
5535  %gep.b = getelementptr inbounds i32* %b, i64 %index
5536  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5537  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5538  %load.a = load <2 x i64>* %ptr.a, align 2
5539  %load.b = load <2 x i64>* %ptr.b, align 2
5540  %cmp = icmp ugt <2 x i64> %load.a, %load.b
5541  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
5542  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5543  %index.next = add i64 %index, 8
5544  %loop = icmp eq i64 %index.next, 16384
5545  br i1 %loop, label %for.end, label %vector.body
5546
5547for.end:                                          ; preds = %vector.body
5548  ret void
5549
5550; AVX512VL-LABEL: test191:
5551; AVX512VL: vpminuq
5552}
5553
5554define void @test192(i32* nocapture %a, i32* nocapture %b) nounwind {
5555vector.ph:
5556  br label %vector.body
5557
5558vector.body:                                      ; preds = %vector.body, %vector.ph
5559  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
5560  %gep.a = getelementptr inbounds i32* %a, i64 %index
5561  %gep.b = getelementptr inbounds i32* %b, i64 %index
5562  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
5563  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
5564  %load.a = load <2 x i64>* %ptr.a, align 2
5565  %load.b = load <2 x i64>* %ptr.b, align 2
5566  %cmp = icmp uge <2 x i64> %load.a, %load.b
5567  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
5568  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
5569  %index.next = add i64 %index, 8
5570  %loop = icmp eq i64 %index.next, 16384
5571  br i1 %loop, label %for.end, label %vector.body
5572
5573for.end:                                          ; preds = %vector.body
5574  ret void
5575
5576; AVX512VL-LABEL: test192:
5577; AVX512VL: vpminuq
5578}
5579