1; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
2; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
3; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
4; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
5
6define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
7vector.ph:
8  br label %vector.body
9
10vector.body:                                      ; preds = %vector.body, %vector.ph
11  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
12  %gep.a = getelementptr inbounds i8* %a, i64 %index
13  %gep.b = getelementptr inbounds i8* %b, i64 %index
14  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
15  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
16  %load.a = load <16 x i8>* %ptr.a, align 2
17  %load.b = load <16 x i8>* %ptr.b, align 2
18  %cmp = icmp slt <16 x i8> %load.a, %load.b
19  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
20  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
21  %index.next = add i64 %index, 16
22  %loop = icmp eq i64 %index.next, 16384
23  br i1 %loop, label %for.end, label %vector.body
24
25for.end:                                          ; preds = %vector.body
26  ret void
27
28; SSE4-LABEL: test1:
29; SSE4: pminsb
30
31; AVX1-LABEL: test1:
32; AVX1: vpminsb
33
34; AVX2-LABEL: test1:
35; AVX2: vpminsb
36}
37
38define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
39vector.ph:
40  br label %vector.body
41
42vector.body:                                      ; preds = %vector.body, %vector.ph
43  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
44  %gep.a = getelementptr inbounds i8* %a, i64 %index
45  %gep.b = getelementptr inbounds i8* %b, i64 %index
46  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
47  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
48  %load.a = load <16 x i8>* %ptr.a, align 2
49  %load.b = load <16 x i8>* %ptr.b, align 2
50  %cmp = icmp sle <16 x i8> %load.a, %load.b
51  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
52  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
53  %index.next = add i64 %index, 16
54  %loop = icmp eq i64 %index.next, 16384
55  br i1 %loop, label %for.end, label %vector.body
56
57for.end:                                          ; preds = %vector.body
58  ret void
59
60; SSE4-LABEL: test2:
61; SSE4: pminsb
62
63; AVX1-LABEL: test2:
64; AVX1: vpminsb
65
66; AVX2-LABEL: test2:
67; AVX2: vpminsb
68}
69
70define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
71vector.ph:
72  br label %vector.body
73
74vector.body:                                      ; preds = %vector.body, %vector.ph
75  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
76  %gep.a = getelementptr inbounds i8* %a, i64 %index
77  %gep.b = getelementptr inbounds i8* %b, i64 %index
78  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
79  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
80  %load.a = load <16 x i8>* %ptr.a, align 2
81  %load.b = load <16 x i8>* %ptr.b, align 2
82  %cmp = icmp sgt <16 x i8> %load.a, %load.b
83  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
84  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
85  %index.next = add i64 %index, 16
86  %loop = icmp eq i64 %index.next, 16384
87  br i1 %loop, label %for.end, label %vector.body
88
89for.end:                                          ; preds = %vector.body
90  ret void
91
92; SSE4-LABEL: test3:
93; SSE4: pmaxsb
94
95; AVX1-LABEL: test3:
96; AVX1: vpmaxsb
97
98; AVX2-LABEL: test3:
99; AVX2: vpmaxsb
100}
101
102define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
103vector.ph:
104  br label %vector.body
105
106vector.body:                                      ; preds = %vector.body, %vector.ph
107  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
108  %gep.a = getelementptr inbounds i8* %a, i64 %index
109  %gep.b = getelementptr inbounds i8* %b, i64 %index
110  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
111  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
112  %load.a = load <16 x i8>* %ptr.a, align 2
113  %load.b = load <16 x i8>* %ptr.b, align 2
114  %cmp = icmp sge <16 x i8> %load.a, %load.b
115  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
116  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
117  %index.next = add i64 %index, 16
118  %loop = icmp eq i64 %index.next, 16384
119  br i1 %loop, label %for.end, label %vector.body
120
121for.end:                                          ; preds = %vector.body
122  ret void
123
124; SSE4-LABEL: test4:
125; SSE4: pmaxsb
126
127; AVX1-LABEL: test4:
128; AVX1: vpmaxsb
129
130; AVX2-LABEL: test4:
131; AVX2: vpmaxsb
132}
133
134define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
135vector.ph:
136  br label %vector.body
137
138vector.body:                                      ; preds = %vector.body, %vector.ph
139  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
140  %gep.a = getelementptr inbounds i8* %a, i64 %index
141  %gep.b = getelementptr inbounds i8* %b, i64 %index
142  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
143  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
144  %load.a = load <16 x i8>* %ptr.a, align 2
145  %load.b = load <16 x i8>* %ptr.b, align 2
146  %cmp = icmp ult <16 x i8> %load.a, %load.b
147  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
148  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
149  %index.next = add i64 %index, 16
150  %loop = icmp eq i64 %index.next, 16384
151  br i1 %loop, label %for.end, label %vector.body
152
153for.end:                                          ; preds = %vector.body
154  ret void
155
156; SSE2-LABEL: test5:
157; SSE2: pminub
158
159; AVX1-LABEL: test5:
160; AVX1: vpminub
161
162; AVX2-LABEL: test5:
163; AVX2: vpminub
164}
165
166define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
167vector.ph:
168  br label %vector.body
169
170vector.body:                                      ; preds = %vector.body, %vector.ph
171  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
172  %gep.a = getelementptr inbounds i8* %a, i64 %index
173  %gep.b = getelementptr inbounds i8* %b, i64 %index
174  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
175  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
176  %load.a = load <16 x i8>* %ptr.a, align 2
177  %load.b = load <16 x i8>* %ptr.b, align 2
178  %cmp = icmp ule <16 x i8> %load.a, %load.b
179  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
180  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
181  %index.next = add i64 %index, 16
182  %loop = icmp eq i64 %index.next, 16384
183  br i1 %loop, label %for.end, label %vector.body
184
185for.end:                                          ; preds = %vector.body
186  ret void
187
188; SSE2-LABEL: test6:
189; SSE2: pminub
190
191; AVX1-LABEL: test6:
192; AVX1: vpminub
193
194; AVX2-LABEL: test6:
195; AVX2: vpminub
196}
197
198define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
199vector.ph:
200  br label %vector.body
201
202vector.body:                                      ; preds = %vector.body, %vector.ph
203  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
204  %gep.a = getelementptr inbounds i8* %a, i64 %index
205  %gep.b = getelementptr inbounds i8* %b, i64 %index
206  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
207  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
208  %load.a = load <16 x i8>* %ptr.a, align 2
209  %load.b = load <16 x i8>* %ptr.b, align 2
210  %cmp = icmp ugt <16 x i8> %load.a, %load.b
211  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
212  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
213  %index.next = add i64 %index, 16
214  %loop = icmp eq i64 %index.next, 16384
215  br i1 %loop, label %for.end, label %vector.body
216
217for.end:                                          ; preds = %vector.body
218  ret void
219
220; SSE2-LABEL: test7:
221; SSE2: pmaxub
222
223; AVX1-LABEL: test7:
224; AVX1: vpmaxub
225
226; AVX2-LABEL: test7:
227; AVX2: vpmaxub
228}
229
230define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
231vector.ph:
232  br label %vector.body
233
234vector.body:                                      ; preds = %vector.body, %vector.ph
235  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
236  %gep.a = getelementptr inbounds i8* %a, i64 %index
237  %gep.b = getelementptr inbounds i8* %b, i64 %index
238  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
239  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
240  %load.a = load <16 x i8>* %ptr.a, align 2
241  %load.b = load <16 x i8>* %ptr.b, align 2
242  %cmp = icmp uge <16 x i8> %load.a, %load.b
243  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
244  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
245  %index.next = add i64 %index, 16
246  %loop = icmp eq i64 %index.next, 16384
247  br i1 %loop, label %for.end, label %vector.body
248
249for.end:                                          ; preds = %vector.body
250  ret void
251
252; SSE2-LABEL: test8:
253; SSE2: pmaxub
254
255; AVX1-LABEL: test8:
256; AVX1: vpmaxub
257
258; AVX2-LABEL: test8:
259; AVX2: vpmaxub
260}
261
262define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
263vector.ph:
264  br label %vector.body
265
266vector.body:                                      ; preds = %vector.body, %vector.ph
267  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
268  %gep.a = getelementptr inbounds i16* %a, i64 %index
269  %gep.b = getelementptr inbounds i16* %b, i64 %index
270  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
271  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
272  %load.a = load <8 x i16>* %ptr.a, align 2
273  %load.b = load <8 x i16>* %ptr.b, align 2
274  %cmp = icmp slt <8 x i16> %load.a, %load.b
275  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
276  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
277  %index.next = add i64 %index, 8
278  %loop = icmp eq i64 %index.next, 16384
279  br i1 %loop, label %for.end, label %vector.body
280
281for.end:                                          ; preds = %vector.body
282  ret void
283
284; SSE2-LABEL: test9:
285; SSE2: pminsw
286
287; AVX1-LABEL: test9:
288; AVX1: vpminsw
289
290; AVX2-LABEL: test9:
291; AVX2: vpminsw
292}
293
294define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
295vector.ph:
296  br label %vector.body
297
298vector.body:                                      ; preds = %vector.body, %vector.ph
299  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
300  %gep.a = getelementptr inbounds i16* %a, i64 %index
301  %gep.b = getelementptr inbounds i16* %b, i64 %index
302  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
303  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
304  %load.a = load <8 x i16>* %ptr.a, align 2
305  %load.b = load <8 x i16>* %ptr.b, align 2
306  %cmp = icmp sle <8 x i16> %load.a, %load.b
307  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
308  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
309  %index.next = add i64 %index, 8
310  %loop = icmp eq i64 %index.next, 16384
311  br i1 %loop, label %for.end, label %vector.body
312
313for.end:                                          ; preds = %vector.body
314  ret void
315
316; SSE2-LABEL: test10:
317; SSE2: pminsw
318
319; AVX1-LABEL: test10:
320; AVX1: vpminsw
321
322; AVX2-LABEL: test10:
323; AVX2: vpminsw
324}
325
326define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
327vector.ph:
328  br label %vector.body
329
330vector.body:                                      ; preds = %vector.body, %vector.ph
331  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
332  %gep.a = getelementptr inbounds i16* %a, i64 %index
333  %gep.b = getelementptr inbounds i16* %b, i64 %index
334  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
335  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
336  %load.a = load <8 x i16>* %ptr.a, align 2
337  %load.b = load <8 x i16>* %ptr.b, align 2
338  %cmp = icmp sgt <8 x i16> %load.a, %load.b
339  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
340  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
341  %index.next = add i64 %index, 8
342  %loop = icmp eq i64 %index.next, 16384
343  br i1 %loop, label %for.end, label %vector.body
344
345for.end:                                          ; preds = %vector.body
346  ret void
347
348; SSE2-LABEL: test11:
349; SSE2: pmaxsw
350
351; AVX1-LABEL: test11:
352; AVX1: vpmaxsw
353
354; AVX2-LABEL: test11:
355; AVX2: vpmaxsw
356}
357
358define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
359vector.ph:
360  br label %vector.body
361
362vector.body:                                      ; preds = %vector.body, %vector.ph
363  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
364  %gep.a = getelementptr inbounds i16* %a, i64 %index
365  %gep.b = getelementptr inbounds i16* %b, i64 %index
366  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
367  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
368  %load.a = load <8 x i16>* %ptr.a, align 2
369  %load.b = load <8 x i16>* %ptr.b, align 2
370  %cmp = icmp sge <8 x i16> %load.a, %load.b
371  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
372  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
373  %index.next = add i64 %index, 8
374  %loop = icmp eq i64 %index.next, 16384
375  br i1 %loop, label %for.end, label %vector.body
376
377for.end:                                          ; preds = %vector.body
378  ret void
379
380; SSE2-LABEL: test12:
381; SSE2: pmaxsw
382
383; AVX1-LABEL: test12:
384; AVX1: vpmaxsw
385
386; AVX2-LABEL: test12:
387; AVX2: vpmaxsw
388}
389
390define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
391vector.ph:
392  br label %vector.body
393
394vector.body:                                      ; preds = %vector.body, %vector.ph
395  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
396  %gep.a = getelementptr inbounds i16* %a, i64 %index
397  %gep.b = getelementptr inbounds i16* %b, i64 %index
398  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
399  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
400  %load.a = load <8 x i16>* %ptr.a, align 2
401  %load.b = load <8 x i16>* %ptr.b, align 2
402  %cmp = icmp ult <8 x i16> %load.a, %load.b
403  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
404  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
405  %index.next = add i64 %index, 8
406  %loop = icmp eq i64 %index.next, 16384
407  br i1 %loop, label %for.end, label %vector.body
408
409for.end:                                          ; preds = %vector.body
410  ret void
411
412; SSE4-LABEL: test13:
413; SSE4: pminuw
414
415; AVX1-LABEL: test13:
416; AVX1: vpminuw
417
418; AVX2-LABEL: test13:
419; AVX2: vpminuw
420}
421
422define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
423vector.ph:
424  br label %vector.body
425
426vector.body:                                      ; preds = %vector.body, %vector.ph
427  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
428  %gep.a = getelementptr inbounds i16* %a, i64 %index
429  %gep.b = getelementptr inbounds i16* %b, i64 %index
430  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
431  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
432  %load.a = load <8 x i16>* %ptr.a, align 2
433  %load.b = load <8 x i16>* %ptr.b, align 2
434  %cmp = icmp ule <8 x i16> %load.a, %load.b
435  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
436  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
437  %index.next = add i64 %index, 8
438  %loop = icmp eq i64 %index.next, 16384
439  br i1 %loop, label %for.end, label %vector.body
440
441for.end:                                          ; preds = %vector.body
442  ret void
443
444; SSE4-LABEL: test14:
445; SSE4: pminuw
446
447; AVX1-LABEL: test14:
448; AVX1: vpminuw
449
450; AVX2-LABEL: test14:
451; AVX2: vpminuw
452}
453
454define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
455vector.ph:
456  br label %vector.body
457
458vector.body:                                      ; preds = %vector.body, %vector.ph
459  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
460  %gep.a = getelementptr inbounds i16* %a, i64 %index
461  %gep.b = getelementptr inbounds i16* %b, i64 %index
462  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
463  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
464  %load.a = load <8 x i16>* %ptr.a, align 2
465  %load.b = load <8 x i16>* %ptr.b, align 2
466  %cmp = icmp ugt <8 x i16> %load.a, %load.b
467  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
468  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
469  %index.next = add i64 %index, 8
470  %loop = icmp eq i64 %index.next, 16384
471  br i1 %loop, label %for.end, label %vector.body
472
473for.end:                                          ; preds = %vector.body
474  ret void
475
476; SSE4-LABEL: test15:
477; SSE4: pmaxuw
478
479; AVX1-LABEL: test15:
480; AVX1: vpmaxuw
481
482; AVX2-LABEL: test15:
483; AVX2: vpmaxuw
484}
485
486define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
487vector.ph:
488  br label %vector.body
489
490vector.body:                                      ; preds = %vector.body, %vector.ph
491  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
492  %gep.a = getelementptr inbounds i16* %a, i64 %index
493  %gep.b = getelementptr inbounds i16* %b, i64 %index
494  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
495  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
496  %load.a = load <8 x i16>* %ptr.a, align 2
497  %load.b = load <8 x i16>* %ptr.b, align 2
498  %cmp = icmp uge <8 x i16> %load.a, %load.b
499  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
500  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
501  %index.next = add i64 %index, 8
502  %loop = icmp eq i64 %index.next, 16384
503  br i1 %loop, label %for.end, label %vector.body
504
505for.end:                                          ; preds = %vector.body
506  ret void
507
508; SSE4-LABEL: test16:
509; SSE4: pmaxuw
510
511; AVX1-LABEL: test16:
512; AVX1: vpmaxuw
513
514; AVX2-LABEL: test16:
515; AVX2: vpmaxuw
516}
517
518define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
519vector.ph:
520  br label %vector.body
521
522vector.body:                                      ; preds = %vector.body, %vector.ph
523  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
524  %gep.a = getelementptr inbounds i32* %a, i64 %index
525  %gep.b = getelementptr inbounds i32* %b, i64 %index
526  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
527  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
528  %load.a = load <4 x i32>* %ptr.a, align 2
529  %load.b = load <4 x i32>* %ptr.b, align 2
530  %cmp = icmp slt <4 x i32> %load.a, %load.b
531  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
532  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
533  %index.next = add i64 %index, 4
534  %loop = icmp eq i64 %index.next, 16384
535  br i1 %loop, label %for.end, label %vector.body
536
537for.end:                                          ; preds = %vector.body
538  ret void
539
540; SSE4-LABEL: test17:
541; SSE4: pminsd
542
543; AVX1-LABEL: test17:
544; AVX1: vpminsd
545
546; AVX2-LABEL: test17:
547; AVX2: vpminsd
548}
549
550define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
551vector.ph:
552  br label %vector.body
553
554vector.body:                                      ; preds = %vector.body, %vector.ph
555  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
556  %gep.a = getelementptr inbounds i32* %a, i64 %index
557  %gep.b = getelementptr inbounds i32* %b, i64 %index
558  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
559  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
560  %load.a = load <4 x i32>* %ptr.a, align 2
561  %load.b = load <4 x i32>* %ptr.b, align 2
562  %cmp = icmp sle <4 x i32> %load.a, %load.b
563  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
564  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
565  %index.next = add i64 %index, 4
566  %loop = icmp eq i64 %index.next, 16384
567  br i1 %loop, label %for.end, label %vector.body
568
569for.end:                                          ; preds = %vector.body
570  ret void
571
572; SSE4-LABEL: test18:
573; SSE4: pminsd
574
575; AVX1-LABEL: test18:
576; AVX1: vpminsd
577
578; AVX2-LABEL: test18:
579; AVX2: vpminsd
580}
581
582define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
583vector.ph:
584  br label %vector.body
585
586vector.body:                                      ; preds = %vector.body, %vector.ph
587  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
588  %gep.a = getelementptr inbounds i32* %a, i64 %index
589  %gep.b = getelementptr inbounds i32* %b, i64 %index
590  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
591  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
592  %load.a = load <4 x i32>* %ptr.a, align 2
593  %load.b = load <4 x i32>* %ptr.b, align 2
594  %cmp = icmp sgt <4 x i32> %load.a, %load.b
595  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
596  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
597  %index.next = add i64 %index, 4
598  %loop = icmp eq i64 %index.next, 16384
599  br i1 %loop, label %for.end, label %vector.body
600
601for.end:                                          ; preds = %vector.body
602  ret void
603
604; SSE4-LABEL: test19:
605; SSE4: pmaxsd
606
607; AVX1-LABEL: test19:
608; AVX1: vpmaxsd
609
610; AVX2-LABEL: test19:
611; AVX2: vpmaxsd
612}
613
614define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
615vector.ph:
616  br label %vector.body
617
618vector.body:                                      ; preds = %vector.body, %vector.ph
619  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
620  %gep.a = getelementptr inbounds i32* %a, i64 %index
621  %gep.b = getelementptr inbounds i32* %b, i64 %index
622  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
623  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
624  %load.a = load <4 x i32>* %ptr.a, align 2
625  %load.b = load <4 x i32>* %ptr.b, align 2
626  %cmp = icmp sge <4 x i32> %load.a, %load.b
627  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
628  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
629  %index.next = add i64 %index, 4
630  %loop = icmp eq i64 %index.next, 16384
631  br i1 %loop, label %for.end, label %vector.body
632
633for.end:                                          ; preds = %vector.body
634  ret void
635
636; SSE4-LABEL: test20:
637; SSE4: pmaxsd
638
639; AVX1-LABEL: test20:
640; AVX1: vpmaxsd
641
642; AVX2-LABEL: test20:
643; AVX2: vpmaxsd
644}
645
646define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
647vector.ph:
648  br label %vector.body
649
650vector.body:                                      ; preds = %vector.body, %vector.ph
651  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
652  %gep.a = getelementptr inbounds i32* %a, i64 %index
653  %gep.b = getelementptr inbounds i32* %b, i64 %index
654  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
655  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
656  %load.a = load <4 x i32>* %ptr.a, align 2
657  %load.b = load <4 x i32>* %ptr.b, align 2
658  %cmp = icmp ult <4 x i32> %load.a, %load.b
659  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
660  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
661  %index.next = add i64 %index, 4
662  %loop = icmp eq i64 %index.next, 16384
663  br i1 %loop, label %for.end, label %vector.body
664
665for.end:                                          ; preds = %vector.body
666  ret void
667
668; SSE4-LABEL: test21:
669; SSE4: pminud
670
671; AVX1-LABEL: test21:
672; AVX1: vpminud
673
674; AVX2-LABEL: test21:
675; AVX2: vpminud
676}
677
678define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
679vector.ph:
680  br label %vector.body
681
682vector.body:                                      ; preds = %vector.body, %vector.ph
683  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
684  %gep.a = getelementptr inbounds i32* %a, i64 %index
685  %gep.b = getelementptr inbounds i32* %b, i64 %index
686  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
687  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
688  %load.a = load <4 x i32>* %ptr.a, align 2
689  %load.b = load <4 x i32>* %ptr.b, align 2
690  %cmp = icmp ule <4 x i32> %load.a, %load.b
691  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
692  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
693  %index.next = add i64 %index, 4
694  %loop = icmp eq i64 %index.next, 16384
695  br i1 %loop, label %for.end, label %vector.body
696
697for.end:                                          ; preds = %vector.body
698  ret void
699
700; SSE4-LABEL: test22:
701; SSE4: pminud
702
703; AVX1-LABEL: test22:
704; AVX1: vpminud
705
706; AVX2-LABEL: test22:
707; AVX2: vpminud
708}
709
710define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
711vector.ph:
712  br label %vector.body
713
714vector.body:                                      ; preds = %vector.body, %vector.ph
715  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
716  %gep.a = getelementptr inbounds i32* %a, i64 %index
717  %gep.b = getelementptr inbounds i32* %b, i64 %index
718  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
719  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
720  %load.a = load <4 x i32>* %ptr.a, align 2
721  %load.b = load <4 x i32>* %ptr.b, align 2
722  %cmp = icmp ugt <4 x i32> %load.a, %load.b
723  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
724  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
725  %index.next = add i64 %index, 4
726  %loop = icmp eq i64 %index.next, 16384
727  br i1 %loop, label %for.end, label %vector.body
728
729for.end:                                          ; preds = %vector.body
730  ret void
731
732; SSE4-LABEL: test23:
733; SSE4: pmaxud
734
735; AVX1-LABEL: test23:
736; AVX1: vpmaxud
737
738; AVX2-LABEL: test23:
739; AVX2: vpmaxud
740}
741
742define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
743vector.ph:
744  br label %vector.body
745
746vector.body:                                      ; preds = %vector.body, %vector.ph
747  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
748  %gep.a = getelementptr inbounds i32* %a, i64 %index
749  %gep.b = getelementptr inbounds i32* %b, i64 %index
750  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
751  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
752  %load.a = load <4 x i32>* %ptr.a, align 2
753  %load.b = load <4 x i32>* %ptr.b, align 2
754  %cmp = icmp uge <4 x i32> %load.a, %load.b
755  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
756  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
757  %index.next = add i64 %index, 4
758  %loop = icmp eq i64 %index.next, 16384
759  br i1 %loop, label %for.end, label %vector.body
760
761for.end:                                          ; preds = %vector.body
762  ret void
763
764; SSE4-LABEL: test24:
765; SSE4: pmaxud
766
767; AVX1-LABEL: test24:
768; AVX1: vpmaxud
769
770; AVX2-LABEL: test24:
771; AVX2: vpmaxud
772}
773
774define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
775vector.ph:
776  br label %vector.body
777
778vector.body:                                      ; preds = %vector.body, %vector.ph
779  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
780  %gep.a = getelementptr inbounds i8* %a, i64 %index
781  %gep.b = getelementptr inbounds i8* %b, i64 %index
782  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
783  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
784  %load.a = load <32 x i8>* %ptr.a, align 2
785  %load.b = load <32 x i8>* %ptr.b, align 2
786  %cmp = icmp slt <32 x i8> %load.a, %load.b
787  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
788  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
789  %index.next = add i64 %index, 32
790  %loop = icmp eq i64 %index.next, 16384
791  br i1 %loop, label %for.end, label %vector.body
792
793for.end:                                          ; preds = %vector.body
794  ret void
795
796; AVX2-LABEL: test25:
797; AVX2: vpminsb
798}
799
800define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
801vector.ph:
802  br label %vector.body
803
804vector.body:                                      ; preds = %vector.body, %vector.ph
805  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
806  %gep.a = getelementptr inbounds i8* %a, i64 %index
807  %gep.b = getelementptr inbounds i8* %b, i64 %index
808  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
809  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
810  %load.a = load <32 x i8>* %ptr.a, align 2
811  %load.b = load <32 x i8>* %ptr.b, align 2
812  %cmp = icmp sle <32 x i8> %load.a, %load.b
813  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
814  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
815  %index.next = add i64 %index, 32
816  %loop = icmp eq i64 %index.next, 16384
817  br i1 %loop, label %for.end, label %vector.body
818
819for.end:                                          ; preds = %vector.body
820  ret void
821
822; AVX2-LABEL: test26:
823; AVX2: vpminsb
824}
825
826define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
827vector.ph:
828  br label %vector.body
829
830vector.body:                                      ; preds = %vector.body, %vector.ph
831  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
832  %gep.a = getelementptr inbounds i8* %a, i64 %index
833  %gep.b = getelementptr inbounds i8* %b, i64 %index
834  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
835  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
836  %load.a = load <32 x i8>* %ptr.a, align 2
837  %load.b = load <32 x i8>* %ptr.b, align 2
838  %cmp = icmp sgt <32 x i8> %load.a, %load.b
839  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
840  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
841  %index.next = add i64 %index, 32
842  %loop = icmp eq i64 %index.next, 16384
843  br i1 %loop, label %for.end, label %vector.body
844
845for.end:                                          ; preds = %vector.body
846  ret void
847
848; AVX2-LABEL: test27:
849; AVX2: vpmaxsb
850}
851
852define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
853vector.ph:
854  br label %vector.body
855
856vector.body:                                      ; preds = %vector.body, %vector.ph
857  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
858  %gep.a = getelementptr inbounds i8* %a, i64 %index
859  %gep.b = getelementptr inbounds i8* %b, i64 %index
860  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
861  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
862  %load.a = load <32 x i8>* %ptr.a, align 2
863  %load.b = load <32 x i8>* %ptr.b, align 2
864  %cmp = icmp sge <32 x i8> %load.a, %load.b
865  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
866  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
867  %index.next = add i64 %index, 32
868  %loop = icmp eq i64 %index.next, 16384
869  br i1 %loop, label %for.end, label %vector.body
870
871for.end:                                          ; preds = %vector.body
872  ret void
873
874; AVX2-LABEL: test28:
875; AVX2: vpmaxsb
876}
877
878define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
879vector.ph:
880  br label %vector.body
881
882vector.body:                                      ; preds = %vector.body, %vector.ph
883  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
884  %gep.a = getelementptr inbounds i8* %a, i64 %index
885  %gep.b = getelementptr inbounds i8* %b, i64 %index
886  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
887  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
888  %load.a = load <32 x i8>* %ptr.a, align 2
889  %load.b = load <32 x i8>* %ptr.b, align 2
890  %cmp = icmp ult <32 x i8> %load.a, %load.b
891  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
892  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
893  %index.next = add i64 %index, 32
894  %loop = icmp eq i64 %index.next, 16384
895  br i1 %loop, label %for.end, label %vector.body
896
897for.end:                                          ; preds = %vector.body
898  ret void
899
900; AVX2-LABEL: test29:
901; AVX2: vpminub
902}
903
904define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
905vector.ph:
906  br label %vector.body
907
908vector.body:                                      ; preds = %vector.body, %vector.ph
909  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
910  %gep.a = getelementptr inbounds i8* %a, i64 %index
911  %gep.b = getelementptr inbounds i8* %b, i64 %index
912  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
913  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
914  %load.a = load <32 x i8>* %ptr.a, align 2
915  %load.b = load <32 x i8>* %ptr.b, align 2
916  %cmp = icmp ule <32 x i8> %load.a, %load.b
917  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
918  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
919  %index.next = add i64 %index, 32
920  %loop = icmp eq i64 %index.next, 16384
921  br i1 %loop, label %for.end, label %vector.body
922
923for.end:                                          ; preds = %vector.body
924  ret void
925
926; AVX2-LABEL: test30:
927; AVX2: vpminub
928}
929
930define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
931vector.ph:
932  br label %vector.body
933
934vector.body:                                      ; preds = %vector.body, %vector.ph
935  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
936  %gep.a = getelementptr inbounds i8* %a, i64 %index
937  %gep.b = getelementptr inbounds i8* %b, i64 %index
938  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
939  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
940  %load.a = load <32 x i8>* %ptr.a, align 2
941  %load.b = load <32 x i8>* %ptr.b, align 2
942  %cmp = icmp ugt <32 x i8> %load.a, %load.b
943  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
944  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
945  %index.next = add i64 %index, 32
946  %loop = icmp eq i64 %index.next, 16384
947  br i1 %loop, label %for.end, label %vector.body
948
949for.end:                                          ; preds = %vector.body
950  ret void
951
952; AVX2-LABEL: test31:
953; AVX2: vpmaxub
954}
955
956define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
957vector.ph:
958  br label %vector.body
959
960vector.body:                                      ; preds = %vector.body, %vector.ph
961  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
962  %gep.a = getelementptr inbounds i8* %a, i64 %index
963  %gep.b = getelementptr inbounds i8* %b, i64 %index
964  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
965  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
966  %load.a = load <32 x i8>* %ptr.a, align 2
967  %load.b = load <32 x i8>* %ptr.b, align 2
968  %cmp = icmp uge <32 x i8> %load.a, %load.b
969  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
970  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
971  %index.next = add i64 %index, 32
972  %loop = icmp eq i64 %index.next, 16384
973  br i1 %loop, label %for.end, label %vector.body
974
975for.end:                                          ; preds = %vector.body
976  ret void
977
978; AVX2-LABEL: test32:
979; AVX2: vpmaxub
980}
981
982define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
983vector.ph:
984  br label %vector.body
985
986vector.body:                                      ; preds = %vector.body, %vector.ph
987  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
988  %gep.a = getelementptr inbounds i16* %a, i64 %index
989  %gep.b = getelementptr inbounds i16* %b, i64 %index
990  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
991  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
992  %load.a = load <16 x i16>* %ptr.a, align 2
993  %load.b = load <16 x i16>* %ptr.b, align 2
994  %cmp = icmp slt <16 x i16> %load.a, %load.b
995  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
996  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
997  %index.next = add i64 %index, 16
998  %loop = icmp eq i64 %index.next, 16384
999  br i1 %loop, label %for.end, label %vector.body
1000
1001for.end:                                          ; preds = %vector.body
1002  ret void
1003
1004; AVX2-LABEL: test33:
1005; AVX2: vpminsw
1006}
1007
1008define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
1009vector.ph:
1010  br label %vector.body
1011
1012vector.body:                                      ; preds = %vector.body, %vector.ph
1013  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1014  %gep.a = getelementptr inbounds i16* %a, i64 %index
1015  %gep.b = getelementptr inbounds i16* %b, i64 %index
1016  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1017  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1018  %load.a = load <16 x i16>* %ptr.a, align 2
1019  %load.b = load <16 x i16>* %ptr.b, align 2
1020  %cmp = icmp sle <16 x i16> %load.a, %load.b
1021  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1022  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1023  %index.next = add i64 %index, 16
1024  %loop = icmp eq i64 %index.next, 16384
1025  br i1 %loop, label %for.end, label %vector.body
1026
1027for.end:                                          ; preds = %vector.body
1028  ret void
1029
1030; AVX2-LABEL: test34:
1031; AVX2: vpminsw
1032}
1033
1034define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
1035vector.ph:
1036  br label %vector.body
1037
1038vector.body:                                      ; preds = %vector.body, %vector.ph
1039  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1040  %gep.a = getelementptr inbounds i16* %a, i64 %index
1041  %gep.b = getelementptr inbounds i16* %b, i64 %index
1042  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1043  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1044  %load.a = load <16 x i16>* %ptr.a, align 2
1045  %load.b = load <16 x i16>* %ptr.b, align 2
1046  %cmp = icmp sgt <16 x i16> %load.a, %load.b
1047  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1048  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1049  %index.next = add i64 %index, 16
1050  %loop = icmp eq i64 %index.next, 16384
1051  br i1 %loop, label %for.end, label %vector.body
1052
1053for.end:                                          ; preds = %vector.body
1054  ret void
1055
1056; AVX2-LABEL: test35:
1057; AVX2: vpmaxsw
1058}
1059
1060define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
1061vector.ph:
1062  br label %vector.body
1063
1064vector.body:                                      ; preds = %vector.body, %vector.ph
1065  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1066  %gep.a = getelementptr inbounds i16* %a, i64 %index
1067  %gep.b = getelementptr inbounds i16* %b, i64 %index
1068  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1069  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1070  %load.a = load <16 x i16>* %ptr.a, align 2
1071  %load.b = load <16 x i16>* %ptr.b, align 2
1072  %cmp = icmp sge <16 x i16> %load.a, %load.b
1073  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1074  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1075  %index.next = add i64 %index, 16
1076  %loop = icmp eq i64 %index.next, 16384
1077  br i1 %loop, label %for.end, label %vector.body
1078
1079for.end:                                          ; preds = %vector.body
1080  ret void
1081
1082; AVX2-LABEL: test36:
1083; AVX2: vpmaxsw
1084}
1085
1086define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
1087vector.ph:
1088  br label %vector.body
1089
1090vector.body:                                      ; preds = %vector.body, %vector.ph
1091  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1092  %gep.a = getelementptr inbounds i16* %a, i64 %index
1093  %gep.b = getelementptr inbounds i16* %b, i64 %index
1094  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1095  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1096  %load.a = load <16 x i16>* %ptr.a, align 2
1097  %load.b = load <16 x i16>* %ptr.b, align 2
1098  %cmp = icmp ult <16 x i16> %load.a, %load.b
1099  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1100  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1101  %index.next = add i64 %index, 16
1102  %loop = icmp eq i64 %index.next, 16384
1103  br i1 %loop, label %for.end, label %vector.body
1104
1105for.end:                                          ; preds = %vector.body
1106  ret void
1107
1108; AVX2-LABEL: test37:
1109; AVX2: vpminuw
1110}
1111
1112define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
1113vector.ph:
1114  br label %vector.body
1115
1116vector.body:                                      ; preds = %vector.body, %vector.ph
1117  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1118  %gep.a = getelementptr inbounds i16* %a, i64 %index
1119  %gep.b = getelementptr inbounds i16* %b, i64 %index
1120  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1121  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1122  %load.a = load <16 x i16>* %ptr.a, align 2
1123  %load.b = load <16 x i16>* %ptr.b, align 2
1124  %cmp = icmp ule <16 x i16> %load.a, %load.b
1125  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1126  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1127  %index.next = add i64 %index, 16
1128  %loop = icmp eq i64 %index.next, 16384
1129  br i1 %loop, label %for.end, label %vector.body
1130
1131for.end:                                          ; preds = %vector.body
1132  ret void
1133
1134; AVX2-LABEL: test38:
1135; AVX2: vpminuw
1136}
1137
1138define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
1139vector.ph:
1140  br label %vector.body
1141
1142vector.body:                                      ; preds = %vector.body, %vector.ph
1143  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1144  %gep.a = getelementptr inbounds i16* %a, i64 %index
1145  %gep.b = getelementptr inbounds i16* %b, i64 %index
1146  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1147  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1148  %load.a = load <16 x i16>* %ptr.a, align 2
1149  %load.b = load <16 x i16>* %ptr.b, align 2
1150  %cmp = icmp ugt <16 x i16> %load.a, %load.b
1151  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1152  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1153  %index.next = add i64 %index, 16
1154  %loop = icmp eq i64 %index.next, 16384
1155  br i1 %loop, label %for.end, label %vector.body
1156
1157for.end:                                          ; preds = %vector.body
1158  ret void
1159
1160; AVX2-LABEL: test39:
1161; AVX2: vpmaxuw
1162}
1163
1164define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
1165vector.ph:
1166  br label %vector.body
1167
1168vector.body:                                      ; preds = %vector.body, %vector.ph
1169  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1170  %gep.a = getelementptr inbounds i16* %a, i64 %index
1171  %gep.b = getelementptr inbounds i16* %b, i64 %index
1172  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
1173  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
1174  %load.a = load <16 x i16>* %ptr.a, align 2
1175  %load.b = load <16 x i16>* %ptr.b, align 2
1176  %cmp = icmp uge <16 x i16> %load.a, %load.b
1177  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
1178  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
1179  %index.next = add i64 %index, 16
1180  %loop = icmp eq i64 %index.next, 16384
1181  br i1 %loop, label %for.end, label %vector.body
1182
1183for.end:                                          ; preds = %vector.body
1184  ret void
1185
1186; AVX2-LABEL: test40:
1187; AVX2: vpmaxuw
1188}
1189
1190define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
1191vector.ph:
1192  br label %vector.body
1193
1194vector.body:                                      ; preds = %vector.body, %vector.ph
1195  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1196  %gep.a = getelementptr inbounds i32* %a, i64 %index
1197  %gep.b = getelementptr inbounds i32* %b, i64 %index
1198  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1199  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1200  %load.a = load <8 x i32>* %ptr.a, align 2
1201  %load.b = load <8 x i32>* %ptr.b, align 2
1202  %cmp = icmp slt <8 x i32> %load.a, %load.b
1203  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1204  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1205  %index.next = add i64 %index, 8
1206  %loop = icmp eq i64 %index.next, 16384
1207  br i1 %loop, label %for.end, label %vector.body
1208
1209for.end:                                          ; preds = %vector.body
1210  ret void
1211
1212; AVX2-LABEL: test41:
1213; AVX2: vpminsd
1214}
1215
1216define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
1217vector.ph:
1218  br label %vector.body
1219
1220vector.body:                                      ; preds = %vector.body, %vector.ph
1221  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1222  %gep.a = getelementptr inbounds i32* %a, i64 %index
1223  %gep.b = getelementptr inbounds i32* %b, i64 %index
1224  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1225  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1226  %load.a = load <8 x i32>* %ptr.a, align 2
1227  %load.b = load <8 x i32>* %ptr.b, align 2
1228  %cmp = icmp sle <8 x i32> %load.a, %load.b
1229  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1230  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1231  %index.next = add i64 %index, 8
1232  %loop = icmp eq i64 %index.next, 16384
1233  br i1 %loop, label %for.end, label %vector.body
1234
1235for.end:                                          ; preds = %vector.body
1236  ret void
1237
1238; AVX2-LABEL: test42:
1239; AVX2: vpminsd
1240}
1241
1242define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
1243vector.ph:
1244  br label %vector.body
1245
1246vector.body:                                      ; preds = %vector.body, %vector.ph
1247  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1248  %gep.a = getelementptr inbounds i32* %a, i64 %index
1249  %gep.b = getelementptr inbounds i32* %b, i64 %index
1250  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1251  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1252  %load.a = load <8 x i32>* %ptr.a, align 2
1253  %load.b = load <8 x i32>* %ptr.b, align 2
1254  %cmp = icmp sgt <8 x i32> %load.a, %load.b
1255  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1256  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1257  %index.next = add i64 %index, 8
1258  %loop = icmp eq i64 %index.next, 16384
1259  br i1 %loop, label %for.end, label %vector.body
1260
1261for.end:                                          ; preds = %vector.body
1262  ret void
1263
1264; AVX2-LABEL: test43:
1265; AVX2: vpmaxsd
1266}
1267
1268define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
1269vector.ph:
1270  br label %vector.body
1271
1272vector.body:                                      ; preds = %vector.body, %vector.ph
1273  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1274  %gep.a = getelementptr inbounds i32* %a, i64 %index
1275  %gep.b = getelementptr inbounds i32* %b, i64 %index
1276  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1277  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1278  %load.a = load <8 x i32>* %ptr.a, align 2
1279  %load.b = load <8 x i32>* %ptr.b, align 2
1280  %cmp = icmp sge <8 x i32> %load.a, %load.b
1281  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1282  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1283  %index.next = add i64 %index, 8
1284  %loop = icmp eq i64 %index.next, 16384
1285  br i1 %loop, label %for.end, label %vector.body
1286
1287for.end:                                          ; preds = %vector.body
1288  ret void
1289
1290; AVX2-LABEL: test44:
1291; AVX2: vpmaxsd
1292}
1293
1294define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
1295vector.ph:
1296  br label %vector.body
1297
1298vector.body:                                      ; preds = %vector.body, %vector.ph
1299  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1300  %gep.a = getelementptr inbounds i32* %a, i64 %index
1301  %gep.b = getelementptr inbounds i32* %b, i64 %index
1302  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1303  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1304  %load.a = load <8 x i32>* %ptr.a, align 2
1305  %load.b = load <8 x i32>* %ptr.b, align 2
1306  %cmp = icmp ult <8 x i32> %load.a, %load.b
1307  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1308  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1309  %index.next = add i64 %index, 8
1310  %loop = icmp eq i64 %index.next, 16384
1311  br i1 %loop, label %for.end, label %vector.body
1312
1313for.end:                                          ; preds = %vector.body
1314  ret void
1315
1316; AVX2-LABEL: test45:
1317; AVX2: vpminud
1318}
1319
1320define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
1321vector.ph:
1322  br label %vector.body
1323
1324vector.body:                                      ; preds = %vector.body, %vector.ph
1325  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1326  %gep.a = getelementptr inbounds i32* %a, i64 %index
1327  %gep.b = getelementptr inbounds i32* %b, i64 %index
1328  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1329  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1330  %load.a = load <8 x i32>* %ptr.a, align 2
1331  %load.b = load <8 x i32>* %ptr.b, align 2
1332  %cmp = icmp ule <8 x i32> %load.a, %load.b
1333  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1334  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1335  %index.next = add i64 %index, 8
1336  %loop = icmp eq i64 %index.next, 16384
1337  br i1 %loop, label %for.end, label %vector.body
1338
1339for.end:                                          ; preds = %vector.body
1340  ret void
1341
1342; AVX2-LABEL: test46:
1343; AVX2: vpminud
1344}
1345
1346define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
1347vector.ph:
1348  br label %vector.body
1349
1350vector.body:                                      ; preds = %vector.body, %vector.ph
1351  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1352  %gep.a = getelementptr inbounds i32* %a, i64 %index
1353  %gep.b = getelementptr inbounds i32* %b, i64 %index
1354  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1355  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1356  %load.a = load <8 x i32>* %ptr.a, align 2
1357  %load.b = load <8 x i32>* %ptr.b, align 2
1358  %cmp = icmp ugt <8 x i32> %load.a, %load.b
1359  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1360  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1361  %index.next = add i64 %index, 8
1362  %loop = icmp eq i64 %index.next, 16384
1363  br i1 %loop, label %for.end, label %vector.body
1364
1365for.end:                                          ; preds = %vector.body
1366  ret void
1367
1368; AVX2-LABEL: test47:
1369; AVX2: vpmaxud
1370}
1371
1372define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
1373vector.ph:
1374  br label %vector.body
1375
1376vector.body:                                      ; preds = %vector.body, %vector.ph
1377  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1378  %gep.a = getelementptr inbounds i32* %a, i64 %index
1379  %gep.b = getelementptr inbounds i32* %b, i64 %index
1380  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
1381  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
1382  %load.a = load <8 x i32>* %ptr.a, align 2
1383  %load.b = load <8 x i32>* %ptr.b, align 2
1384  %cmp = icmp uge <8 x i32> %load.a, %load.b
1385  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
1386  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
1387  %index.next = add i64 %index, 8
1388  %loop = icmp eq i64 %index.next, 16384
1389  br i1 %loop, label %for.end, label %vector.body
1390
1391for.end:                                          ; preds = %vector.body
1392  ret void
1393
1394; AVX2-LABEL: test48:
1395; AVX2: vpmaxud
1396}
1397
1398define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
1399vector.ph:
1400  br label %vector.body
1401
1402vector.body:                                      ; preds = %vector.body, %vector.ph
1403  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1404  %gep.a = getelementptr inbounds i8* %a, i64 %index
1405  %gep.b = getelementptr inbounds i8* %b, i64 %index
1406  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1407  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1408  %load.a = load <16 x i8>* %ptr.a, align 2
1409  %load.b = load <16 x i8>* %ptr.b, align 2
1410  %cmp = icmp slt <16 x i8> %load.a, %load.b
1411  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1412  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1413  %index.next = add i64 %index, 16
1414  %loop = icmp eq i64 %index.next, 16384
1415  br i1 %loop, label %for.end, label %vector.body
1416
1417for.end:                                          ; preds = %vector.body
1418  ret void
1419
1420; SSE4-LABEL: test49:
1421; SSE4: pmaxsb
1422
1423; AVX1-LABEL: test49:
1424; AVX1: vpmaxsb
1425
1426; AVX2-LABEL: test49:
1427; AVX2: vpmaxsb
1428}
1429
1430define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
1431vector.ph:
1432  br label %vector.body
1433
1434vector.body:                                      ; preds = %vector.body, %vector.ph
1435  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1436  %gep.a = getelementptr inbounds i8* %a, i64 %index
1437  %gep.b = getelementptr inbounds i8* %b, i64 %index
1438  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1439  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1440  %load.a = load <16 x i8>* %ptr.a, align 2
1441  %load.b = load <16 x i8>* %ptr.b, align 2
1442  %cmp = icmp sle <16 x i8> %load.a, %load.b
1443  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1444  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1445  %index.next = add i64 %index, 16
1446  %loop = icmp eq i64 %index.next, 16384
1447  br i1 %loop, label %for.end, label %vector.body
1448
1449for.end:                                          ; preds = %vector.body
1450  ret void
1451
1452; SSE4-LABEL: test50:
1453; SSE4: pmaxsb
1454
1455; AVX1-LABEL: test50:
1456; AVX1: vpmaxsb
1457
1458; AVX2-LABEL: test50:
1459; AVX2: vpmaxsb
1460}
1461
1462define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
1463vector.ph:
1464  br label %vector.body
1465
1466vector.body:                                      ; preds = %vector.body, %vector.ph
1467  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1468  %gep.a = getelementptr inbounds i8* %a, i64 %index
1469  %gep.b = getelementptr inbounds i8* %b, i64 %index
1470  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1471  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1472  %load.a = load <16 x i8>* %ptr.a, align 2
1473  %load.b = load <16 x i8>* %ptr.b, align 2
1474  %cmp = icmp sgt <16 x i8> %load.a, %load.b
1475  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1476  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1477  %index.next = add i64 %index, 16
1478  %loop = icmp eq i64 %index.next, 16384
1479  br i1 %loop, label %for.end, label %vector.body
1480
1481for.end:                                          ; preds = %vector.body
1482  ret void
1483
1484; SSE4-LABEL: test51:
1485; SSE4: pminsb
1486
1487; AVX1-LABEL: test51:
1488; AVX1: vpminsb
1489
1490; AVX2-LABEL: test51:
1491; AVX2: vpminsb
1492}
1493
1494define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
1495vector.ph:
1496  br label %vector.body
1497
1498vector.body:                                      ; preds = %vector.body, %vector.ph
1499  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1500  %gep.a = getelementptr inbounds i8* %a, i64 %index
1501  %gep.b = getelementptr inbounds i8* %b, i64 %index
1502  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1503  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1504  %load.a = load <16 x i8>* %ptr.a, align 2
1505  %load.b = load <16 x i8>* %ptr.b, align 2
1506  %cmp = icmp sge <16 x i8> %load.a, %load.b
1507  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1508  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1509  %index.next = add i64 %index, 16
1510  %loop = icmp eq i64 %index.next, 16384
1511  br i1 %loop, label %for.end, label %vector.body
1512
1513for.end:                                          ; preds = %vector.body
1514  ret void
1515
1516; SSE4-LABEL: test52:
1517; SSE4: pminsb
1518
1519; AVX1-LABEL: test52:
1520; AVX1: vpminsb
1521
1522; AVX2-LABEL: test52:
1523; AVX2: vpminsb
1524}
1525
1526define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
1527vector.ph:
1528  br label %vector.body
1529
1530vector.body:                                      ; preds = %vector.body, %vector.ph
1531  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1532  %gep.a = getelementptr inbounds i8* %a, i64 %index
1533  %gep.b = getelementptr inbounds i8* %b, i64 %index
1534  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1535  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1536  %load.a = load <16 x i8>* %ptr.a, align 2
1537  %load.b = load <16 x i8>* %ptr.b, align 2
1538  %cmp = icmp ult <16 x i8> %load.a, %load.b
1539  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1540  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1541  %index.next = add i64 %index, 16
1542  %loop = icmp eq i64 %index.next, 16384
1543  br i1 %loop, label %for.end, label %vector.body
1544
1545for.end:                                          ; preds = %vector.body
1546  ret void
1547
1548; SSE2-LABEL: test53:
1549; SSE2: pmaxub
1550
1551; AVX1-LABEL: test53:
1552; AVX1: vpmaxub
1553
1554; AVX2-LABEL: test53:
1555; AVX2: vpmaxub
1556}
1557
1558define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
1559vector.ph:
1560  br label %vector.body
1561
1562vector.body:                                      ; preds = %vector.body, %vector.ph
1563  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1564  %gep.a = getelementptr inbounds i8* %a, i64 %index
1565  %gep.b = getelementptr inbounds i8* %b, i64 %index
1566  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1567  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1568  %load.a = load <16 x i8>* %ptr.a, align 2
1569  %load.b = load <16 x i8>* %ptr.b, align 2
1570  %cmp = icmp ule <16 x i8> %load.a, %load.b
1571  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1572  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1573  %index.next = add i64 %index, 16
1574  %loop = icmp eq i64 %index.next, 16384
1575  br i1 %loop, label %for.end, label %vector.body
1576
1577for.end:                                          ; preds = %vector.body
1578  ret void
1579
1580; SSE2-LABEL: test54:
1581; SSE2: pmaxub
1582
1583; AVX1-LABEL: test54:
1584; AVX1: vpmaxub
1585
1586; AVX2-LABEL: test54:
1587; AVX2: vpmaxub
1588}
1589
1590define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
1591vector.ph:
1592  br label %vector.body
1593
1594vector.body:                                      ; preds = %vector.body, %vector.ph
1595  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1596  %gep.a = getelementptr inbounds i8* %a, i64 %index
1597  %gep.b = getelementptr inbounds i8* %b, i64 %index
1598  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1599  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1600  %load.a = load <16 x i8>* %ptr.a, align 2
1601  %load.b = load <16 x i8>* %ptr.b, align 2
1602  %cmp = icmp ugt <16 x i8> %load.a, %load.b
1603  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1604  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1605  %index.next = add i64 %index, 16
1606  %loop = icmp eq i64 %index.next, 16384
1607  br i1 %loop, label %for.end, label %vector.body
1608
1609for.end:                                          ; preds = %vector.body
1610  ret void
1611
1612; SSE2-LABEL: test55:
1613; SSE2: pminub
1614
1615; AVX1-LABEL: test55:
1616; AVX1: vpminub
1617
1618; AVX2-LABEL: test55:
1619; AVX2: vpminub
1620}
1621
1622define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
1623vector.ph:
1624  br label %vector.body
1625
1626vector.body:                                      ; preds = %vector.body, %vector.ph
1627  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1628  %gep.a = getelementptr inbounds i8* %a, i64 %index
1629  %gep.b = getelementptr inbounds i8* %b, i64 %index
1630  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
1631  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
1632  %load.a = load <16 x i8>* %ptr.a, align 2
1633  %load.b = load <16 x i8>* %ptr.b, align 2
1634  %cmp = icmp uge <16 x i8> %load.a, %load.b
1635  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
1636  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
1637  %index.next = add i64 %index, 16
1638  %loop = icmp eq i64 %index.next, 16384
1639  br i1 %loop, label %for.end, label %vector.body
1640
1641for.end:                                          ; preds = %vector.body
1642  ret void
1643
1644; SSE2-LABEL: test56:
1645; SSE2: pminub
1646
1647; AVX1-LABEL: test56:
1648; AVX1: vpminub
1649
1650; AVX2-LABEL: test56:
1651; AVX2: vpminub
1652}
1653
1654define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
1655vector.ph:
1656  br label %vector.body
1657
1658vector.body:                                      ; preds = %vector.body, %vector.ph
1659  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1660  %gep.a = getelementptr inbounds i16* %a, i64 %index
1661  %gep.b = getelementptr inbounds i16* %b, i64 %index
1662  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1663  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1664  %load.a = load <8 x i16>* %ptr.a, align 2
1665  %load.b = load <8 x i16>* %ptr.b, align 2
1666  %cmp = icmp slt <8 x i16> %load.a, %load.b
1667  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1668  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1669  %index.next = add i64 %index, 8
1670  %loop = icmp eq i64 %index.next, 16384
1671  br i1 %loop, label %for.end, label %vector.body
1672
1673for.end:                                          ; preds = %vector.body
1674  ret void
1675
1676; SSE2-LABEL: test57:
1677; SSE2: pmaxsw
1678
1679; AVX1-LABEL: test57:
1680; AVX1: vpmaxsw
1681
1682; AVX2-LABEL: test57:
1683; AVX2: vpmaxsw
1684}
1685
1686define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
1687vector.ph:
1688  br label %vector.body
1689
1690vector.body:                                      ; preds = %vector.body, %vector.ph
1691  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1692  %gep.a = getelementptr inbounds i16* %a, i64 %index
1693  %gep.b = getelementptr inbounds i16* %b, i64 %index
1694  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1695  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1696  %load.a = load <8 x i16>* %ptr.a, align 2
1697  %load.b = load <8 x i16>* %ptr.b, align 2
1698  %cmp = icmp sle <8 x i16> %load.a, %load.b
1699  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1700  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1701  %index.next = add i64 %index, 8
1702  %loop = icmp eq i64 %index.next, 16384
1703  br i1 %loop, label %for.end, label %vector.body
1704
1705for.end:                                          ; preds = %vector.body
1706  ret void
1707
1708; SSE2-LABEL: test58:
1709; SSE2: pmaxsw
1710
1711; AVX1-LABEL: test58:
1712; AVX1: vpmaxsw
1713
1714; AVX2-LABEL: test58:
1715; AVX2: vpmaxsw
1716}
1717
1718define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
1719vector.ph:
1720  br label %vector.body
1721
1722vector.body:                                      ; preds = %vector.body, %vector.ph
1723  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1724  %gep.a = getelementptr inbounds i16* %a, i64 %index
1725  %gep.b = getelementptr inbounds i16* %b, i64 %index
1726  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1727  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1728  %load.a = load <8 x i16>* %ptr.a, align 2
1729  %load.b = load <8 x i16>* %ptr.b, align 2
1730  %cmp = icmp sgt <8 x i16> %load.a, %load.b
1731  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1732  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1733  %index.next = add i64 %index, 8
1734  %loop = icmp eq i64 %index.next, 16384
1735  br i1 %loop, label %for.end, label %vector.body
1736
1737for.end:                                          ; preds = %vector.body
1738  ret void
1739
1740; SSE2-LABEL: test59:
1741; SSE2: pminsw
1742
1743; AVX1-LABEL: test59:
1744; AVX1: vpminsw
1745
1746; AVX2-LABEL: test59:
1747; AVX2: vpminsw
1748}
1749
1750define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
1751vector.ph:
1752  br label %vector.body
1753
1754vector.body:                                      ; preds = %vector.body, %vector.ph
1755  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1756  %gep.a = getelementptr inbounds i16* %a, i64 %index
1757  %gep.b = getelementptr inbounds i16* %b, i64 %index
1758  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1759  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1760  %load.a = load <8 x i16>* %ptr.a, align 2
1761  %load.b = load <8 x i16>* %ptr.b, align 2
1762  %cmp = icmp sge <8 x i16> %load.a, %load.b
1763  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1764  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1765  %index.next = add i64 %index, 8
1766  %loop = icmp eq i64 %index.next, 16384
1767  br i1 %loop, label %for.end, label %vector.body
1768
1769for.end:                                          ; preds = %vector.body
1770  ret void
1771
1772; SSE2-LABEL: test60:
1773; SSE2: pminsw
1774
1775; AVX1-LABEL: test60:
1776; AVX1: vpminsw
1777
1778; AVX2-LABEL: test60:
1779; AVX2: vpminsw
1780}
1781
1782define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
1783vector.ph:
1784  br label %vector.body
1785
1786vector.body:                                      ; preds = %vector.body, %vector.ph
1787  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1788  %gep.a = getelementptr inbounds i16* %a, i64 %index
1789  %gep.b = getelementptr inbounds i16* %b, i64 %index
1790  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1791  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1792  %load.a = load <8 x i16>* %ptr.a, align 2
1793  %load.b = load <8 x i16>* %ptr.b, align 2
1794  %cmp = icmp ult <8 x i16> %load.a, %load.b
1795  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1796  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1797  %index.next = add i64 %index, 8
1798  %loop = icmp eq i64 %index.next, 16384
1799  br i1 %loop, label %for.end, label %vector.body
1800
1801for.end:                                          ; preds = %vector.body
1802  ret void
1803
1804; SSE4-LABEL: test61:
1805; SSE4: pmaxuw
1806
1807; AVX1-LABEL: test61:
1808; AVX1: vpmaxuw
1809
1810; AVX2-LABEL: test61:
1811; AVX2: vpmaxuw
1812}
1813
1814define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
1815vector.ph:
1816  br label %vector.body
1817
1818vector.body:                                      ; preds = %vector.body, %vector.ph
1819  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1820  %gep.a = getelementptr inbounds i16* %a, i64 %index
1821  %gep.b = getelementptr inbounds i16* %b, i64 %index
1822  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1823  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1824  %load.a = load <8 x i16>* %ptr.a, align 2
1825  %load.b = load <8 x i16>* %ptr.b, align 2
1826  %cmp = icmp ule <8 x i16> %load.a, %load.b
1827  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1828  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1829  %index.next = add i64 %index, 8
1830  %loop = icmp eq i64 %index.next, 16384
1831  br i1 %loop, label %for.end, label %vector.body
1832
1833for.end:                                          ; preds = %vector.body
1834  ret void
1835
1836; SSE4-LABEL: test62:
1837; SSE4: pmaxuw
1838
1839; AVX1-LABEL: test62:
1840; AVX1: vpmaxuw
1841
1842; AVX2-LABEL: test62:
1843; AVX2: vpmaxuw
1844}
1845
1846define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
1847vector.ph:
1848  br label %vector.body
1849
1850vector.body:                                      ; preds = %vector.body, %vector.ph
1851  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1852  %gep.a = getelementptr inbounds i16* %a, i64 %index
1853  %gep.b = getelementptr inbounds i16* %b, i64 %index
1854  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1855  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1856  %load.a = load <8 x i16>* %ptr.a, align 2
1857  %load.b = load <8 x i16>* %ptr.b, align 2
1858  %cmp = icmp ugt <8 x i16> %load.a, %load.b
1859  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1860  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1861  %index.next = add i64 %index, 8
1862  %loop = icmp eq i64 %index.next, 16384
1863  br i1 %loop, label %for.end, label %vector.body
1864
1865for.end:                                          ; preds = %vector.body
1866  ret void
1867
1868; SSE4-LABEL: test63:
1869; SSE4: pminuw
1870
1871; AVX1-LABEL: test63:
1872; AVX1: vpminuw
1873
1874; AVX2-LABEL: test63:
1875; AVX2: vpminuw
1876}
1877
1878define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
1879vector.ph:
1880  br label %vector.body
1881
1882vector.body:                                      ; preds = %vector.body, %vector.ph
1883  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1884  %gep.a = getelementptr inbounds i16* %a, i64 %index
1885  %gep.b = getelementptr inbounds i16* %b, i64 %index
1886  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
1887  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
1888  %load.a = load <8 x i16>* %ptr.a, align 2
1889  %load.b = load <8 x i16>* %ptr.b, align 2
1890  %cmp = icmp uge <8 x i16> %load.a, %load.b
1891  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
1892  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
1893  %index.next = add i64 %index, 8
1894  %loop = icmp eq i64 %index.next, 16384
1895  br i1 %loop, label %for.end, label %vector.body
1896
1897for.end:                                          ; preds = %vector.body
1898  ret void
1899
1900; SSE4-LABEL: test64:
1901; SSE4: pminuw
1902
1903; AVX1-LABEL: test64:
1904; AVX1: vpminuw
1905
1906; AVX2-LABEL: test64:
1907; AVX2: vpminuw
1908}
1909
1910define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
1911vector.ph:
1912  br label %vector.body
1913
1914vector.body:                                      ; preds = %vector.body, %vector.ph
1915  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1916  %gep.a = getelementptr inbounds i32* %a, i64 %index
1917  %gep.b = getelementptr inbounds i32* %b, i64 %index
1918  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
1919  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
1920  %load.a = load <4 x i32>* %ptr.a, align 2
1921  %load.b = load <4 x i32>* %ptr.b, align 2
1922  %cmp = icmp slt <4 x i32> %load.a, %load.b
1923  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
1924  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
1925  %index.next = add i64 %index, 4
1926  %loop = icmp eq i64 %index.next, 16384
1927  br i1 %loop, label %for.end, label %vector.body
1928
1929for.end:                                          ; preds = %vector.body
1930  ret void
1931
1932; SSE4-LABEL: test65:
1933; SSE4: pmaxsd
1934
1935; AVX1-LABEL: test65:
1936; AVX1: vpmaxsd
1937
1938; AVX2-LABEL: test65:
1939; AVX2: vpmaxsd
1940}
1941
1942define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
1943vector.ph:
1944  br label %vector.body
1945
1946vector.body:                                      ; preds = %vector.body, %vector.ph
1947  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1948  %gep.a = getelementptr inbounds i32* %a, i64 %index
1949  %gep.b = getelementptr inbounds i32* %b, i64 %index
1950  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
1951  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
1952  %load.a = load <4 x i32>* %ptr.a, align 2
1953  %load.b = load <4 x i32>* %ptr.b, align 2
1954  %cmp = icmp sle <4 x i32> %load.a, %load.b
1955  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
1956  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
1957  %index.next = add i64 %index, 4
1958  %loop = icmp eq i64 %index.next, 16384
1959  br i1 %loop, label %for.end, label %vector.body
1960
1961for.end:                                          ; preds = %vector.body
1962  ret void
1963
1964; SSE4-LABEL: test66:
1965; SSE4: pmaxsd
1966
1967; AVX1-LABEL: test66:
1968; AVX1: vpmaxsd
1969
1970; AVX2-LABEL: test66:
1971; AVX2: vpmaxsd
1972}
1973
1974define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
1975vector.ph:
1976  br label %vector.body
1977
1978vector.body:                                      ; preds = %vector.body, %vector.ph
1979  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1980  %gep.a = getelementptr inbounds i32* %a, i64 %index
1981  %gep.b = getelementptr inbounds i32* %b, i64 %index
1982  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
1983  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
1984  %load.a = load <4 x i32>* %ptr.a, align 2
1985  %load.b = load <4 x i32>* %ptr.b, align 2
1986  %cmp = icmp sgt <4 x i32> %load.a, %load.b
1987  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
1988  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
1989  %index.next = add i64 %index, 4
1990  %loop = icmp eq i64 %index.next, 16384
1991  br i1 %loop, label %for.end, label %vector.body
1992
1993for.end:                                          ; preds = %vector.body
1994  ret void
1995
1996; SSE4-LABEL: test67:
1997; SSE4: pminsd
1998
1999; AVX1-LABEL: test67:
2000; AVX1: vpminsd
2001
2002; AVX2-LABEL: test67:
2003; AVX2: vpminsd
2004}
2005
2006define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
2007vector.ph:
2008  br label %vector.body
2009
2010vector.body:                                      ; preds = %vector.body, %vector.ph
2011  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2012  %gep.a = getelementptr inbounds i32* %a, i64 %index
2013  %gep.b = getelementptr inbounds i32* %b, i64 %index
2014  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2015  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2016  %load.a = load <4 x i32>* %ptr.a, align 2
2017  %load.b = load <4 x i32>* %ptr.b, align 2
2018  %cmp = icmp sge <4 x i32> %load.a, %load.b
2019  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2020  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2021  %index.next = add i64 %index, 4
2022  %loop = icmp eq i64 %index.next, 16384
2023  br i1 %loop, label %for.end, label %vector.body
2024
2025for.end:                                          ; preds = %vector.body
2026  ret void
2027
2028; SSE4-LABEL: test68:
2029; SSE4: pminsd
2030
2031; AVX1-LABEL: test68:
2032; AVX1: vpminsd
2033
2034; AVX2-LABEL: test68:
2035; AVX2: vpminsd
2036}
2037
2038define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
2039vector.ph:
2040  br label %vector.body
2041
2042vector.body:                                      ; preds = %vector.body, %vector.ph
2043  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2044  %gep.a = getelementptr inbounds i32* %a, i64 %index
2045  %gep.b = getelementptr inbounds i32* %b, i64 %index
2046  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2047  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2048  %load.a = load <4 x i32>* %ptr.a, align 2
2049  %load.b = load <4 x i32>* %ptr.b, align 2
2050  %cmp = icmp ult <4 x i32> %load.a, %load.b
2051  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2052  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2053  %index.next = add i64 %index, 4
2054  %loop = icmp eq i64 %index.next, 16384
2055  br i1 %loop, label %for.end, label %vector.body
2056
2057for.end:                                          ; preds = %vector.body
2058  ret void
2059
2060; SSE4-LABEL: test69:
2061; SSE4: pmaxud
2062
2063; AVX1-LABEL: test69:
2064; AVX1: vpmaxud
2065
2066; AVX2-LABEL: test69:
2067; AVX2: vpmaxud
2068}
2069
2070define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
2071vector.ph:
2072  br label %vector.body
2073
2074vector.body:                                      ; preds = %vector.body, %vector.ph
2075  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2076  %gep.a = getelementptr inbounds i32* %a, i64 %index
2077  %gep.b = getelementptr inbounds i32* %b, i64 %index
2078  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2079  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2080  %load.a = load <4 x i32>* %ptr.a, align 2
2081  %load.b = load <4 x i32>* %ptr.b, align 2
2082  %cmp = icmp ule <4 x i32> %load.a, %load.b
2083  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2084  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2085  %index.next = add i64 %index, 4
2086  %loop = icmp eq i64 %index.next, 16384
2087  br i1 %loop, label %for.end, label %vector.body
2088
2089for.end:                                          ; preds = %vector.body
2090  ret void
2091
2092; SSE4-LABEL: test70:
2093; SSE4: pmaxud
2094
2095; AVX1-LABEL: test70:
2096; AVX1: vpmaxud
2097
2098; AVX2-LABEL: test70:
2099; AVX2: vpmaxud
2100}
2101
2102define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
2103vector.ph:
2104  br label %vector.body
2105
2106vector.body:                                      ; preds = %vector.body, %vector.ph
2107  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2108  %gep.a = getelementptr inbounds i32* %a, i64 %index
2109  %gep.b = getelementptr inbounds i32* %b, i64 %index
2110  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2111  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2112  %load.a = load <4 x i32>* %ptr.a, align 2
2113  %load.b = load <4 x i32>* %ptr.b, align 2
2114  %cmp = icmp ugt <4 x i32> %load.a, %load.b
2115  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2116  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2117  %index.next = add i64 %index, 4
2118  %loop = icmp eq i64 %index.next, 16384
2119  br i1 %loop, label %for.end, label %vector.body
2120
2121for.end:                                          ; preds = %vector.body
2122  ret void
2123
2124; SSE4-LABEL: test71:
2125; SSE4: pminud
2126
2127; AVX1-LABEL: test71:
2128; AVX1: vpminud
2129
2130; AVX2-LABEL: test71:
2131; AVX2: vpminud
2132}
2133
2134define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
2135vector.ph:
2136  br label %vector.body
2137
2138vector.body:                                      ; preds = %vector.body, %vector.ph
2139  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2140  %gep.a = getelementptr inbounds i32* %a, i64 %index
2141  %gep.b = getelementptr inbounds i32* %b, i64 %index
2142  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
2143  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
2144  %load.a = load <4 x i32>* %ptr.a, align 2
2145  %load.b = load <4 x i32>* %ptr.b, align 2
2146  %cmp = icmp uge <4 x i32> %load.a, %load.b
2147  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
2148  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
2149  %index.next = add i64 %index, 4
2150  %loop = icmp eq i64 %index.next, 16384
2151  br i1 %loop, label %for.end, label %vector.body
2152
2153for.end:                                          ; preds = %vector.body
2154  ret void
2155
2156; SSE4-LABEL: test72:
2157; SSE4: pminud
2158
2159; AVX1-LABEL: test72:
2160; AVX1: vpminud
2161
2162; AVX2-LABEL: test72:
2163; AVX2: vpminud
2164}
2165
2166define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
2167vector.ph:
2168  br label %vector.body
2169
2170vector.body:                                      ; preds = %vector.body, %vector.ph
2171  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2172  %gep.a = getelementptr inbounds i8* %a, i64 %index
2173  %gep.b = getelementptr inbounds i8* %b, i64 %index
2174  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2175  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2176  %load.a = load <32 x i8>* %ptr.a, align 2
2177  %load.b = load <32 x i8>* %ptr.b, align 2
2178  %cmp = icmp slt <32 x i8> %load.a, %load.b
2179  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2180  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2181  %index.next = add i64 %index, 32
2182  %loop = icmp eq i64 %index.next, 16384
2183  br i1 %loop, label %for.end, label %vector.body
2184
2185for.end:                                          ; preds = %vector.body
2186  ret void
2187
2188; AVX2-LABEL: test73:
2189; AVX2: vpmaxsb
2190}
2191
2192define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
2193vector.ph:
2194  br label %vector.body
2195
2196vector.body:                                      ; preds = %vector.body, %vector.ph
2197  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2198  %gep.a = getelementptr inbounds i8* %a, i64 %index
2199  %gep.b = getelementptr inbounds i8* %b, i64 %index
2200  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2201  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2202  %load.a = load <32 x i8>* %ptr.a, align 2
2203  %load.b = load <32 x i8>* %ptr.b, align 2
2204  %cmp = icmp sle <32 x i8> %load.a, %load.b
2205  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2206  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2207  %index.next = add i64 %index, 32
2208  %loop = icmp eq i64 %index.next, 16384
2209  br i1 %loop, label %for.end, label %vector.body
2210
2211for.end:                                          ; preds = %vector.body
2212  ret void
2213
2214; AVX2-LABEL: test74:
2215; AVX2: vpmaxsb
2216}
2217
2218define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
2219vector.ph:
2220  br label %vector.body
2221
2222vector.body:                                      ; preds = %vector.body, %vector.ph
2223  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2224  %gep.a = getelementptr inbounds i8* %a, i64 %index
2225  %gep.b = getelementptr inbounds i8* %b, i64 %index
2226  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2227  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2228  %load.a = load <32 x i8>* %ptr.a, align 2
2229  %load.b = load <32 x i8>* %ptr.b, align 2
2230  %cmp = icmp sgt <32 x i8> %load.a, %load.b
2231  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2232  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2233  %index.next = add i64 %index, 32
2234  %loop = icmp eq i64 %index.next, 16384
2235  br i1 %loop, label %for.end, label %vector.body
2236
2237for.end:                                          ; preds = %vector.body
2238  ret void
2239
2240; AVX2-LABEL: test75:
2241; AVX2: vpminsb
2242}
2243
2244define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
2245vector.ph:
2246  br label %vector.body
2247
2248vector.body:                                      ; preds = %vector.body, %vector.ph
2249  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2250  %gep.a = getelementptr inbounds i8* %a, i64 %index
2251  %gep.b = getelementptr inbounds i8* %b, i64 %index
2252  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2253  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2254  %load.a = load <32 x i8>* %ptr.a, align 2
2255  %load.b = load <32 x i8>* %ptr.b, align 2
2256  %cmp = icmp sge <32 x i8> %load.a, %load.b
2257  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2258  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2259  %index.next = add i64 %index, 32
2260  %loop = icmp eq i64 %index.next, 16384
2261  br i1 %loop, label %for.end, label %vector.body
2262
2263for.end:                                          ; preds = %vector.body
2264  ret void
2265
2266; AVX2-LABEL: test76:
2267; AVX2: vpminsb
2268}
2269
2270define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
2271vector.ph:
2272  br label %vector.body
2273
2274vector.body:                                      ; preds = %vector.body, %vector.ph
2275  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2276  %gep.a = getelementptr inbounds i8* %a, i64 %index
2277  %gep.b = getelementptr inbounds i8* %b, i64 %index
2278  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2279  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2280  %load.a = load <32 x i8>* %ptr.a, align 2
2281  %load.b = load <32 x i8>* %ptr.b, align 2
2282  %cmp = icmp ult <32 x i8> %load.a, %load.b
2283  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2284  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2285  %index.next = add i64 %index, 32
2286  %loop = icmp eq i64 %index.next, 16384
2287  br i1 %loop, label %for.end, label %vector.body
2288
2289for.end:                                          ; preds = %vector.body
2290  ret void
2291
2292; AVX2-LABEL: test77:
2293; AVX2: vpmaxub
2294}
2295
2296define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
2297vector.ph:
2298  br label %vector.body
2299
2300vector.body:                                      ; preds = %vector.body, %vector.ph
2301  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2302  %gep.a = getelementptr inbounds i8* %a, i64 %index
2303  %gep.b = getelementptr inbounds i8* %b, i64 %index
2304  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2305  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2306  %load.a = load <32 x i8>* %ptr.a, align 2
2307  %load.b = load <32 x i8>* %ptr.b, align 2
2308  %cmp = icmp ule <32 x i8> %load.a, %load.b
2309  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2310  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2311  %index.next = add i64 %index, 32
2312  %loop = icmp eq i64 %index.next, 16384
2313  br i1 %loop, label %for.end, label %vector.body
2314
2315for.end:                                          ; preds = %vector.body
2316  ret void
2317
2318; AVX2-LABEL: test78:
2319; AVX2: vpmaxub
2320}
2321
2322define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
2323vector.ph:
2324  br label %vector.body
2325
2326vector.body:                                      ; preds = %vector.body, %vector.ph
2327  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2328  %gep.a = getelementptr inbounds i8* %a, i64 %index
2329  %gep.b = getelementptr inbounds i8* %b, i64 %index
2330  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2331  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2332  %load.a = load <32 x i8>* %ptr.a, align 2
2333  %load.b = load <32 x i8>* %ptr.b, align 2
2334  %cmp = icmp ugt <32 x i8> %load.a, %load.b
2335  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2336  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2337  %index.next = add i64 %index, 32
2338  %loop = icmp eq i64 %index.next, 16384
2339  br i1 %loop, label %for.end, label %vector.body
2340
2341for.end:                                          ; preds = %vector.body
2342  ret void
2343
2344; AVX2-LABEL: test79:
2345; AVX2: vpminub
2346}
2347
2348define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
2349vector.ph:
2350  br label %vector.body
2351
2352vector.body:                                      ; preds = %vector.body, %vector.ph
2353  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2354  %gep.a = getelementptr inbounds i8* %a, i64 %index
2355  %gep.b = getelementptr inbounds i8* %b, i64 %index
2356  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
2357  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
2358  %load.a = load <32 x i8>* %ptr.a, align 2
2359  %load.b = load <32 x i8>* %ptr.b, align 2
2360  %cmp = icmp uge <32 x i8> %load.a, %load.b
2361  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
2362  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
2363  %index.next = add i64 %index, 32
2364  %loop = icmp eq i64 %index.next, 16384
2365  br i1 %loop, label %for.end, label %vector.body
2366
2367for.end:                                          ; preds = %vector.body
2368  ret void
2369
2370; AVX2-LABEL: test80:
2371; AVX2: vpminub
2372}
2373
2374define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
2375vector.ph:
2376  br label %vector.body
2377
2378vector.body:                                      ; preds = %vector.body, %vector.ph
2379  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2380  %gep.a = getelementptr inbounds i16* %a, i64 %index
2381  %gep.b = getelementptr inbounds i16* %b, i64 %index
2382  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2383  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2384  %load.a = load <16 x i16>* %ptr.a, align 2
2385  %load.b = load <16 x i16>* %ptr.b, align 2
2386  %cmp = icmp slt <16 x i16> %load.a, %load.b
2387  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2388  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2389  %index.next = add i64 %index, 16
2390  %loop = icmp eq i64 %index.next, 16384
2391  br i1 %loop, label %for.end, label %vector.body
2392
2393for.end:                                          ; preds = %vector.body
2394  ret void
2395
2396; AVX2-LABEL: test81:
2397; AVX2: vpmaxsw
2398}
2399
2400define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
2401vector.ph:
2402  br label %vector.body
2403
2404vector.body:                                      ; preds = %vector.body, %vector.ph
2405  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2406  %gep.a = getelementptr inbounds i16* %a, i64 %index
2407  %gep.b = getelementptr inbounds i16* %b, i64 %index
2408  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2409  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2410  %load.a = load <16 x i16>* %ptr.a, align 2
2411  %load.b = load <16 x i16>* %ptr.b, align 2
2412  %cmp = icmp sle <16 x i16> %load.a, %load.b
2413  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2414  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2415  %index.next = add i64 %index, 16
2416  %loop = icmp eq i64 %index.next, 16384
2417  br i1 %loop, label %for.end, label %vector.body
2418
2419for.end:                                          ; preds = %vector.body
2420  ret void
2421
2422; AVX2-LABEL: test82:
2423; AVX2: vpmaxsw
2424}
2425
2426define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
2427vector.ph:
2428  br label %vector.body
2429
2430vector.body:                                      ; preds = %vector.body, %vector.ph
2431  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2432  %gep.a = getelementptr inbounds i16* %a, i64 %index
2433  %gep.b = getelementptr inbounds i16* %b, i64 %index
2434  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2435  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2436  %load.a = load <16 x i16>* %ptr.a, align 2
2437  %load.b = load <16 x i16>* %ptr.b, align 2
2438  %cmp = icmp sgt <16 x i16> %load.a, %load.b
2439  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2440  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2441  %index.next = add i64 %index, 16
2442  %loop = icmp eq i64 %index.next, 16384
2443  br i1 %loop, label %for.end, label %vector.body
2444
2445for.end:                                          ; preds = %vector.body
2446  ret void
2447
2448; AVX2-LABEL: test83:
2449; AVX2: vpminsw
2450}
2451
2452define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
2453vector.ph:
2454  br label %vector.body
2455
2456vector.body:                                      ; preds = %vector.body, %vector.ph
2457  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2458  %gep.a = getelementptr inbounds i16* %a, i64 %index
2459  %gep.b = getelementptr inbounds i16* %b, i64 %index
2460  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2461  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2462  %load.a = load <16 x i16>* %ptr.a, align 2
2463  %load.b = load <16 x i16>* %ptr.b, align 2
2464  %cmp = icmp sge <16 x i16> %load.a, %load.b
2465  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2466  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2467  %index.next = add i64 %index, 16
2468  %loop = icmp eq i64 %index.next, 16384
2469  br i1 %loop, label %for.end, label %vector.body
2470
2471for.end:                                          ; preds = %vector.body
2472  ret void
2473
2474; AVX2-LABEL: test84:
2475; AVX2: vpminsw
2476}
2477
2478define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
2479vector.ph:
2480  br label %vector.body
2481
2482vector.body:                                      ; preds = %vector.body, %vector.ph
2483  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2484  %gep.a = getelementptr inbounds i16* %a, i64 %index
2485  %gep.b = getelementptr inbounds i16* %b, i64 %index
2486  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2487  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2488  %load.a = load <16 x i16>* %ptr.a, align 2
2489  %load.b = load <16 x i16>* %ptr.b, align 2
2490  %cmp = icmp ult <16 x i16> %load.a, %load.b
2491  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2492  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2493  %index.next = add i64 %index, 16
2494  %loop = icmp eq i64 %index.next, 16384
2495  br i1 %loop, label %for.end, label %vector.body
2496
2497for.end:                                          ; preds = %vector.body
2498  ret void
2499
2500; AVX2-LABEL: test85:
2501; AVX2: vpmaxuw
2502}
2503
2504define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
2505vector.ph:
2506  br label %vector.body
2507
2508vector.body:                                      ; preds = %vector.body, %vector.ph
2509  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2510  %gep.a = getelementptr inbounds i16* %a, i64 %index
2511  %gep.b = getelementptr inbounds i16* %b, i64 %index
2512  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2513  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2514  %load.a = load <16 x i16>* %ptr.a, align 2
2515  %load.b = load <16 x i16>* %ptr.b, align 2
2516  %cmp = icmp ule <16 x i16> %load.a, %load.b
2517  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2518  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2519  %index.next = add i64 %index, 16
2520  %loop = icmp eq i64 %index.next, 16384
2521  br i1 %loop, label %for.end, label %vector.body
2522
2523for.end:                                          ; preds = %vector.body
2524  ret void
2525
2526; AVX2-LABEL: test86:
2527; AVX2: vpmaxuw
2528}
2529
2530define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
2531vector.ph:
2532  br label %vector.body
2533
2534vector.body:                                      ; preds = %vector.body, %vector.ph
2535  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2536  %gep.a = getelementptr inbounds i16* %a, i64 %index
2537  %gep.b = getelementptr inbounds i16* %b, i64 %index
2538  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2539  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2540  %load.a = load <16 x i16>* %ptr.a, align 2
2541  %load.b = load <16 x i16>* %ptr.b, align 2
2542  %cmp = icmp ugt <16 x i16> %load.a, %load.b
2543  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2544  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2545  %index.next = add i64 %index, 16
2546  %loop = icmp eq i64 %index.next, 16384
2547  br i1 %loop, label %for.end, label %vector.body
2548
2549for.end:                                          ; preds = %vector.body
2550  ret void
2551
2552; AVX2-LABEL: test87:
2553; AVX2: vpminuw
2554}
2555
2556define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
2557vector.ph:
2558  br label %vector.body
2559
2560vector.body:                                      ; preds = %vector.body, %vector.ph
2561  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2562  %gep.a = getelementptr inbounds i16* %a, i64 %index
2563  %gep.b = getelementptr inbounds i16* %b, i64 %index
2564  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
2565  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
2566  %load.a = load <16 x i16>* %ptr.a, align 2
2567  %load.b = load <16 x i16>* %ptr.b, align 2
2568  %cmp = icmp uge <16 x i16> %load.a, %load.b
2569  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
2570  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
2571  %index.next = add i64 %index, 16
2572  %loop = icmp eq i64 %index.next, 16384
2573  br i1 %loop, label %for.end, label %vector.body
2574
2575for.end:                                          ; preds = %vector.body
2576  ret void
2577
2578; AVX2-LABEL: test88:
2579; AVX2: vpminuw
2580}
2581
2582define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
2583vector.ph:
2584  br label %vector.body
2585
2586vector.body:                                      ; preds = %vector.body, %vector.ph
2587  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2588  %gep.a = getelementptr inbounds i32* %a, i64 %index
2589  %gep.b = getelementptr inbounds i32* %b, i64 %index
2590  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2591  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2592  %load.a = load <8 x i32>* %ptr.a, align 2
2593  %load.b = load <8 x i32>* %ptr.b, align 2
2594  %cmp = icmp slt <8 x i32> %load.a, %load.b
2595  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2596  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2597  %index.next = add i64 %index, 8
2598  %loop = icmp eq i64 %index.next, 16384
2599  br i1 %loop, label %for.end, label %vector.body
2600
2601for.end:                                          ; preds = %vector.body
2602  ret void
2603
2604; AVX2-LABEL: test89:
2605; AVX2: vpmaxsd
2606}
2607
2608define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
2609vector.ph:
2610  br label %vector.body
2611
2612vector.body:                                      ; preds = %vector.body, %vector.ph
2613  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2614  %gep.a = getelementptr inbounds i32* %a, i64 %index
2615  %gep.b = getelementptr inbounds i32* %b, i64 %index
2616  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2617  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2618  %load.a = load <8 x i32>* %ptr.a, align 2
2619  %load.b = load <8 x i32>* %ptr.b, align 2
2620  %cmp = icmp sle <8 x i32> %load.a, %load.b
2621  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2622  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2623  %index.next = add i64 %index, 8
2624  %loop = icmp eq i64 %index.next, 16384
2625  br i1 %loop, label %for.end, label %vector.body
2626
2627for.end:                                          ; preds = %vector.body
2628  ret void
2629
2630; AVX2-LABEL: test90:
2631; AVX2: vpmaxsd
2632}
2633
2634define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
2635vector.ph:
2636  br label %vector.body
2637
2638vector.body:                                      ; preds = %vector.body, %vector.ph
2639  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2640  %gep.a = getelementptr inbounds i32* %a, i64 %index
2641  %gep.b = getelementptr inbounds i32* %b, i64 %index
2642  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2643  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2644  %load.a = load <8 x i32>* %ptr.a, align 2
2645  %load.b = load <8 x i32>* %ptr.b, align 2
2646  %cmp = icmp sgt <8 x i32> %load.a, %load.b
2647  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2648  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2649  %index.next = add i64 %index, 8
2650  %loop = icmp eq i64 %index.next, 16384
2651  br i1 %loop, label %for.end, label %vector.body
2652
2653for.end:                                          ; preds = %vector.body
2654  ret void
2655
2656; AVX2-LABEL: test91:
2657; AVX2: vpminsd
2658}
2659
2660define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
2661vector.ph:
2662  br label %vector.body
2663
2664vector.body:                                      ; preds = %vector.body, %vector.ph
2665  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2666  %gep.a = getelementptr inbounds i32* %a, i64 %index
2667  %gep.b = getelementptr inbounds i32* %b, i64 %index
2668  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2669  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2670  %load.a = load <8 x i32>* %ptr.a, align 2
2671  %load.b = load <8 x i32>* %ptr.b, align 2
2672  %cmp = icmp sge <8 x i32> %load.a, %load.b
2673  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2674  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2675  %index.next = add i64 %index, 8
2676  %loop = icmp eq i64 %index.next, 16384
2677  br i1 %loop, label %for.end, label %vector.body
2678
2679for.end:                                          ; preds = %vector.body
2680  ret void
2681
2682; AVX2-LABEL: test92:
2683; AVX2: vpminsd
2684}
2685
2686define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
2687vector.ph:
2688  br label %vector.body
2689
2690vector.body:                                      ; preds = %vector.body, %vector.ph
2691  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2692  %gep.a = getelementptr inbounds i32* %a, i64 %index
2693  %gep.b = getelementptr inbounds i32* %b, i64 %index
2694  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2695  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2696  %load.a = load <8 x i32>* %ptr.a, align 2
2697  %load.b = load <8 x i32>* %ptr.b, align 2
2698  %cmp = icmp ult <8 x i32> %load.a, %load.b
2699  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2700  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2701  %index.next = add i64 %index, 8
2702  %loop = icmp eq i64 %index.next, 16384
2703  br i1 %loop, label %for.end, label %vector.body
2704
2705for.end:                                          ; preds = %vector.body
2706  ret void
2707
2708; AVX2-LABEL: test93:
2709; AVX2: vpmaxud
2710}
2711
2712define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
2713vector.ph:
2714  br label %vector.body
2715
2716vector.body:                                      ; preds = %vector.body, %vector.ph
2717  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2718  %gep.a = getelementptr inbounds i32* %a, i64 %index
2719  %gep.b = getelementptr inbounds i32* %b, i64 %index
2720  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2721  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2722  %load.a = load <8 x i32>* %ptr.a, align 2
2723  %load.b = load <8 x i32>* %ptr.b, align 2
2724  %cmp = icmp ule <8 x i32> %load.a, %load.b
2725  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2726  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2727  %index.next = add i64 %index, 8
2728  %loop = icmp eq i64 %index.next, 16384
2729  br i1 %loop, label %for.end, label %vector.body
2730
2731for.end:                                          ; preds = %vector.body
2732  ret void
2733
2734; AVX2-LABEL: test94:
2735; AVX2: vpmaxud
2736}
2737
2738define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
2739vector.ph:
2740  br label %vector.body
2741
2742vector.body:                                      ; preds = %vector.body, %vector.ph
2743  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2744  %gep.a = getelementptr inbounds i32* %a, i64 %index
2745  %gep.b = getelementptr inbounds i32* %b, i64 %index
2746  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2747  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2748  %load.a = load <8 x i32>* %ptr.a, align 2
2749  %load.b = load <8 x i32>* %ptr.b, align 2
2750  %cmp = icmp ugt <8 x i32> %load.a, %load.b
2751  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2752  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2753  %index.next = add i64 %index, 8
2754  %loop = icmp eq i64 %index.next, 16384
2755  br i1 %loop, label %for.end, label %vector.body
2756
2757for.end:                                          ; preds = %vector.body
2758  ret void
2759
2760; AVX2-LABEL: test95:
2761; AVX2: vpminud
2762}
2763
2764define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
2765vector.ph:
2766  br label %vector.body
2767
2768vector.body:                                      ; preds = %vector.body, %vector.ph
2769  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2770  %gep.a = getelementptr inbounds i32* %a, i64 %index
2771  %gep.b = getelementptr inbounds i32* %b, i64 %index
2772  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
2773  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
2774  %load.a = load <8 x i32>* %ptr.a, align 2
2775  %load.b = load <8 x i32>* %ptr.b, align 2
2776  %cmp = icmp uge <8 x i32> %load.a, %load.b
2777  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
2778  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
2779  %index.next = add i64 %index, 8
2780  %loop = icmp eq i64 %index.next, 16384
2781  br i1 %loop, label %for.end, label %vector.body
2782
2783for.end:                                          ; preds = %vector.body
2784  ret void
2785
2786; AVX2-LABEL: test96:
2787; AVX2: vpminud
2788}
2789