1; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2 2; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4 3; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1 4; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2 5 6define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind { 7vector.ph: 8 br label %vector.body 9 10vector.body: ; preds = %vector.body, %vector.ph 11 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 12 %gep.a = getelementptr inbounds i8* %a, i64 %index 13 %gep.b = getelementptr inbounds i8* %b, i64 %index 14 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 15 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 16 %load.a = load <16 x i8>* %ptr.a, align 2 17 %load.b = load <16 x i8>* %ptr.b, align 2 18 %cmp = icmp slt <16 x i8> %load.a, %load.b 19 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 20 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 21 %index.next = add i64 %index, 16 22 %loop = icmp eq i64 %index.next, 16384 23 br i1 %loop, label %for.end, label %vector.body 24 25for.end: ; preds = %vector.body 26 ret void 27 28; SSE4-LABEL: test1: 29; SSE4: pminsb 30 31; AVX1-LABEL: test1: 32; AVX1: vpminsb 33 34; AVX2-LABEL: test1: 35; AVX2: vpminsb 36} 37 38define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind { 39vector.ph: 40 br label %vector.body 41 42vector.body: ; preds = %vector.body, %vector.ph 43 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 44 %gep.a = getelementptr inbounds i8* %a, i64 %index 45 %gep.b = getelementptr inbounds i8* %b, i64 %index 46 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 47 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 48 %load.a = load <16 x i8>* %ptr.a, align 2 49 %load.b = load <16 x i8>* %ptr.b, align 2 50 %cmp = icmp sle <16 x i8> %load.a, %load.b 51 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 52 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 53 %index.next = add i64 %index, 16 54 %loop = icmp eq i64 %index.next, 16384 55 br i1 %loop, label %for.end, label %vector.body 56 57for.end: ; preds = %vector.body 58 ret void 59 60; SSE4-LABEL: test2: 61; SSE4: pminsb 62 63; AVX1-LABEL: test2: 64; AVX1: vpminsb 65 66; AVX2-LABEL: test2: 67; AVX2: vpminsb 68} 69 70define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind { 71vector.ph: 72 br label %vector.body 73 74vector.body: ; preds = %vector.body, %vector.ph 75 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 76 %gep.a = getelementptr inbounds i8* %a, i64 %index 77 %gep.b = getelementptr inbounds i8* %b, i64 %index 78 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 79 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 80 %load.a = load <16 x i8>* %ptr.a, align 2 81 %load.b = load <16 x i8>* %ptr.b, align 2 82 %cmp = icmp sgt <16 x i8> %load.a, %load.b 83 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 84 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 85 %index.next = add i64 %index, 16 86 %loop = icmp eq i64 %index.next, 16384 87 br i1 %loop, label %for.end, label %vector.body 88 89for.end: ; preds = %vector.body 90 ret void 91 92; SSE4-LABEL: test3: 93; SSE4: pmaxsb 94 95; AVX1-LABEL: test3: 96; AVX1: vpmaxsb 97 98; AVX2-LABEL: test3: 99; AVX2: vpmaxsb 100} 101 102define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind { 103vector.ph: 104 br label %vector.body 105 106vector.body: ; preds = %vector.body, %vector.ph 107 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 108 %gep.a = getelementptr inbounds i8* %a, i64 %index 109 %gep.b = getelementptr inbounds i8* %b, i64 %index 110 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 111 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 112 %load.a = load <16 x i8>* %ptr.a, align 2 113 %load.b = load <16 x i8>* %ptr.b, align 2 114 %cmp = icmp sge <16 x i8> %load.a, %load.b 115 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 116 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 117 %index.next = add i64 %index, 16 118 %loop = icmp eq i64 %index.next, 16384 119 br i1 %loop, label %for.end, label %vector.body 120 121for.end: ; preds = %vector.body 122 ret void 123 124; SSE4-LABEL: test4: 125; SSE4: pmaxsb 126 127; AVX1-LABEL: test4: 128; AVX1: vpmaxsb 129 130; AVX2-LABEL: test4: 131; AVX2: vpmaxsb 132} 133 134define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind { 135vector.ph: 136 br label %vector.body 137 138vector.body: ; preds = %vector.body, %vector.ph 139 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 140 %gep.a = getelementptr inbounds i8* %a, i64 %index 141 %gep.b = getelementptr inbounds i8* %b, i64 %index 142 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 143 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 144 %load.a = load <16 x i8>* %ptr.a, align 2 145 %load.b = load <16 x i8>* %ptr.b, align 2 146 %cmp = icmp ult <16 x i8> %load.a, %load.b 147 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 148 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 149 %index.next = add i64 %index, 16 150 %loop = icmp eq i64 %index.next, 16384 151 br i1 %loop, label %for.end, label %vector.body 152 153for.end: ; preds = %vector.body 154 ret void 155 156; SSE2-LABEL: test5: 157; SSE2: pminub 158 159; AVX1-LABEL: test5: 160; AVX1: vpminub 161 162; AVX2-LABEL: test5: 163; AVX2: vpminub 164} 165 166define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind { 167vector.ph: 168 br label %vector.body 169 170vector.body: ; preds = %vector.body, %vector.ph 171 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 172 %gep.a = getelementptr inbounds i8* %a, i64 %index 173 %gep.b = getelementptr inbounds i8* %b, i64 %index 174 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 175 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 176 %load.a = load <16 x i8>* %ptr.a, align 2 177 %load.b = load <16 x i8>* %ptr.b, align 2 178 %cmp = icmp ule <16 x i8> %load.a, %load.b 179 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 180 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 181 %index.next = add i64 %index, 16 182 %loop = icmp eq i64 %index.next, 16384 183 br i1 %loop, label %for.end, label %vector.body 184 185for.end: ; preds = %vector.body 186 ret void 187 188; SSE2-LABEL: test6: 189; SSE2: pminub 190 191; AVX1-LABEL: test6: 192; AVX1: vpminub 193 194; AVX2-LABEL: test6: 195; AVX2: vpminub 196} 197 198define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind { 199vector.ph: 200 br label %vector.body 201 202vector.body: ; preds = %vector.body, %vector.ph 203 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 204 %gep.a = getelementptr inbounds i8* %a, i64 %index 205 %gep.b = getelementptr inbounds i8* %b, i64 %index 206 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 207 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 208 %load.a = load <16 x i8>* %ptr.a, align 2 209 %load.b = load <16 x i8>* %ptr.b, align 2 210 %cmp = icmp ugt <16 x i8> %load.a, %load.b 211 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 212 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 213 %index.next = add i64 %index, 16 214 %loop = icmp eq i64 %index.next, 16384 215 br i1 %loop, label %for.end, label %vector.body 216 217for.end: ; preds = %vector.body 218 ret void 219 220; SSE2-LABEL: test7: 221; SSE2: pmaxub 222 223; AVX1-LABEL: test7: 224; AVX1: vpmaxub 225 226; AVX2-LABEL: test7: 227; AVX2: vpmaxub 228} 229 230define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind { 231vector.ph: 232 br label %vector.body 233 234vector.body: ; preds = %vector.body, %vector.ph 235 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 236 %gep.a = getelementptr inbounds i8* %a, i64 %index 237 %gep.b = getelementptr inbounds i8* %b, i64 %index 238 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 239 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 240 %load.a = load <16 x i8>* %ptr.a, align 2 241 %load.b = load <16 x i8>* %ptr.b, align 2 242 %cmp = icmp uge <16 x i8> %load.a, %load.b 243 %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b 244 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 245 %index.next = add i64 %index, 16 246 %loop = icmp eq i64 %index.next, 16384 247 br i1 %loop, label %for.end, label %vector.body 248 249for.end: ; preds = %vector.body 250 ret void 251 252; SSE2-LABEL: test8: 253; SSE2: pmaxub 254 255; AVX1-LABEL: test8: 256; AVX1: vpmaxub 257 258; AVX2-LABEL: test8: 259; AVX2: vpmaxub 260} 261 262define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind { 263vector.ph: 264 br label %vector.body 265 266vector.body: ; preds = %vector.body, %vector.ph 267 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 268 %gep.a = getelementptr inbounds i16* %a, i64 %index 269 %gep.b = getelementptr inbounds i16* %b, i64 %index 270 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 271 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 272 %load.a = load <8 x i16>* %ptr.a, align 2 273 %load.b = load <8 x i16>* %ptr.b, align 2 274 %cmp = icmp slt <8 x i16> %load.a, %load.b 275 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 276 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 277 %index.next = add i64 %index, 8 278 %loop = icmp eq i64 %index.next, 16384 279 br i1 %loop, label %for.end, label %vector.body 280 281for.end: ; preds = %vector.body 282 ret void 283 284; SSE2-LABEL: test9: 285; SSE2: pminsw 286 287; AVX1-LABEL: test9: 288; AVX1: vpminsw 289 290; AVX2-LABEL: test9: 291; AVX2: vpminsw 292} 293 294define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind { 295vector.ph: 296 br label %vector.body 297 298vector.body: ; preds = %vector.body, %vector.ph 299 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 300 %gep.a = getelementptr inbounds i16* %a, i64 %index 301 %gep.b = getelementptr inbounds i16* %b, i64 %index 302 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 303 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 304 %load.a = load <8 x i16>* %ptr.a, align 2 305 %load.b = load <8 x i16>* %ptr.b, align 2 306 %cmp = icmp sle <8 x i16> %load.a, %load.b 307 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 308 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 309 %index.next = add i64 %index, 8 310 %loop = icmp eq i64 %index.next, 16384 311 br i1 %loop, label %for.end, label %vector.body 312 313for.end: ; preds = %vector.body 314 ret void 315 316; SSE2-LABEL: test10: 317; SSE2: pminsw 318 319; AVX1-LABEL: test10: 320; AVX1: vpminsw 321 322; AVX2-LABEL: test10: 323; AVX2: vpminsw 324} 325 326define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind { 327vector.ph: 328 br label %vector.body 329 330vector.body: ; preds = %vector.body, %vector.ph 331 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 332 %gep.a = getelementptr inbounds i16* %a, i64 %index 333 %gep.b = getelementptr inbounds i16* %b, i64 %index 334 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 335 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 336 %load.a = load <8 x i16>* %ptr.a, align 2 337 %load.b = load <8 x i16>* %ptr.b, align 2 338 %cmp = icmp sgt <8 x i16> %load.a, %load.b 339 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 340 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 341 %index.next = add i64 %index, 8 342 %loop = icmp eq i64 %index.next, 16384 343 br i1 %loop, label %for.end, label %vector.body 344 345for.end: ; preds = %vector.body 346 ret void 347 348; SSE2-LABEL: test11: 349; SSE2: pmaxsw 350 351; AVX1-LABEL: test11: 352; AVX1: vpmaxsw 353 354; AVX2-LABEL: test11: 355; AVX2: vpmaxsw 356} 357 358define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind { 359vector.ph: 360 br label %vector.body 361 362vector.body: ; preds = %vector.body, %vector.ph 363 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 364 %gep.a = getelementptr inbounds i16* %a, i64 %index 365 %gep.b = getelementptr inbounds i16* %b, i64 %index 366 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 367 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 368 %load.a = load <8 x i16>* %ptr.a, align 2 369 %load.b = load <8 x i16>* %ptr.b, align 2 370 %cmp = icmp sge <8 x i16> %load.a, %load.b 371 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 372 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 373 %index.next = add i64 %index, 8 374 %loop = icmp eq i64 %index.next, 16384 375 br i1 %loop, label %for.end, label %vector.body 376 377for.end: ; preds = %vector.body 378 ret void 379 380; SSE2-LABEL: test12: 381; SSE2: pmaxsw 382 383; AVX1-LABEL: test12: 384; AVX1: vpmaxsw 385 386; AVX2-LABEL: test12: 387; AVX2: vpmaxsw 388} 389 390define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind { 391vector.ph: 392 br label %vector.body 393 394vector.body: ; preds = %vector.body, %vector.ph 395 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 396 %gep.a = getelementptr inbounds i16* %a, i64 %index 397 %gep.b = getelementptr inbounds i16* %b, i64 %index 398 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 399 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 400 %load.a = load <8 x i16>* %ptr.a, align 2 401 %load.b = load <8 x i16>* %ptr.b, align 2 402 %cmp = icmp ult <8 x i16> %load.a, %load.b 403 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 404 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 405 %index.next = add i64 %index, 8 406 %loop = icmp eq i64 %index.next, 16384 407 br i1 %loop, label %for.end, label %vector.body 408 409for.end: ; preds = %vector.body 410 ret void 411 412; SSE4-LABEL: test13: 413; SSE4: pminuw 414 415; AVX1-LABEL: test13: 416; AVX1: vpminuw 417 418; AVX2-LABEL: test13: 419; AVX2: vpminuw 420} 421 422define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind { 423vector.ph: 424 br label %vector.body 425 426vector.body: ; preds = %vector.body, %vector.ph 427 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 428 %gep.a = getelementptr inbounds i16* %a, i64 %index 429 %gep.b = getelementptr inbounds i16* %b, i64 %index 430 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 431 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 432 %load.a = load <8 x i16>* %ptr.a, align 2 433 %load.b = load <8 x i16>* %ptr.b, align 2 434 %cmp = icmp ule <8 x i16> %load.a, %load.b 435 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 436 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 437 %index.next = add i64 %index, 8 438 %loop = icmp eq i64 %index.next, 16384 439 br i1 %loop, label %for.end, label %vector.body 440 441for.end: ; preds = %vector.body 442 ret void 443 444; SSE4-LABEL: test14: 445; SSE4: pminuw 446 447; AVX1-LABEL: test14: 448; AVX1: vpminuw 449 450; AVX2-LABEL: test14: 451; AVX2: vpminuw 452} 453 454define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind { 455vector.ph: 456 br label %vector.body 457 458vector.body: ; preds = %vector.body, %vector.ph 459 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 460 %gep.a = getelementptr inbounds i16* %a, i64 %index 461 %gep.b = getelementptr inbounds i16* %b, i64 %index 462 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 463 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 464 %load.a = load <8 x i16>* %ptr.a, align 2 465 %load.b = load <8 x i16>* %ptr.b, align 2 466 %cmp = icmp ugt <8 x i16> %load.a, %load.b 467 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 468 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 469 %index.next = add i64 %index, 8 470 %loop = icmp eq i64 %index.next, 16384 471 br i1 %loop, label %for.end, label %vector.body 472 473for.end: ; preds = %vector.body 474 ret void 475 476; SSE4-LABEL: test15: 477; SSE4: pmaxuw 478 479; AVX1-LABEL: test15: 480; AVX1: vpmaxuw 481 482; AVX2-LABEL: test15: 483; AVX2: vpmaxuw 484} 485 486define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind { 487vector.ph: 488 br label %vector.body 489 490vector.body: ; preds = %vector.body, %vector.ph 491 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 492 %gep.a = getelementptr inbounds i16* %a, i64 %index 493 %gep.b = getelementptr inbounds i16* %b, i64 %index 494 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 495 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 496 %load.a = load <8 x i16>* %ptr.a, align 2 497 %load.b = load <8 x i16>* %ptr.b, align 2 498 %cmp = icmp uge <8 x i16> %load.a, %load.b 499 %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b 500 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 501 %index.next = add i64 %index, 8 502 %loop = icmp eq i64 %index.next, 16384 503 br i1 %loop, label %for.end, label %vector.body 504 505for.end: ; preds = %vector.body 506 ret void 507 508; SSE4-LABEL: test16: 509; SSE4: pmaxuw 510 511; AVX1-LABEL: test16: 512; AVX1: vpmaxuw 513 514; AVX2-LABEL: test16: 515; AVX2: vpmaxuw 516} 517 518define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind { 519vector.ph: 520 br label %vector.body 521 522vector.body: ; preds = %vector.body, %vector.ph 523 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 524 %gep.a = getelementptr inbounds i32* %a, i64 %index 525 %gep.b = getelementptr inbounds i32* %b, i64 %index 526 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 527 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 528 %load.a = load <4 x i32>* %ptr.a, align 2 529 %load.b = load <4 x i32>* %ptr.b, align 2 530 %cmp = icmp slt <4 x i32> %load.a, %load.b 531 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 532 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 533 %index.next = add i64 %index, 4 534 %loop = icmp eq i64 %index.next, 16384 535 br i1 %loop, label %for.end, label %vector.body 536 537for.end: ; preds = %vector.body 538 ret void 539 540; SSE4-LABEL: test17: 541; SSE4: pminsd 542 543; AVX1-LABEL: test17: 544; AVX1: vpminsd 545 546; AVX2-LABEL: test17: 547; AVX2: vpminsd 548} 549 550define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind { 551vector.ph: 552 br label %vector.body 553 554vector.body: ; preds = %vector.body, %vector.ph 555 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 556 %gep.a = getelementptr inbounds i32* %a, i64 %index 557 %gep.b = getelementptr inbounds i32* %b, i64 %index 558 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 559 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 560 %load.a = load <4 x i32>* %ptr.a, align 2 561 %load.b = load <4 x i32>* %ptr.b, align 2 562 %cmp = icmp sle <4 x i32> %load.a, %load.b 563 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 564 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 565 %index.next = add i64 %index, 4 566 %loop = icmp eq i64 %index.next, 16384 567 br i1 %loop, label %for.end, label %vector.body 568 569for.end: ; preds = %vector.body 570 ret void 571 572; SSE4-LABEL: test18: 573; SSE4: pminsd 574 575; AVX1-LABEL: test18: 576; AVX1: vpminsd 577 578; AVX2-LABEL: test18: 579; AVX2: vpminsd 580} 581 582define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind { 583vector.ph: 584 br label %vector.body 585 586vector.body: ; preds = %vector.body, %vector.ph 587 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 588 %gep.a = getelementptr inbounds i32* %a, i64 %index 589 %gep.b = getelementptr inbounds i32* %b, i64 %index 590 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 591 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 592 %load.a = load <4 x i32>* %ptr.a, align 2 593 %load.b = load <4 x i32>* %ptr.b, align 2 594 %cmp = icmp sgt <4 x i32> %load.a, %load.b 595 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 596 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 597 %index.next = add i64 %index, 4 598 %loop = icmp eq i64 %index.next, 16384 599 br i1 %loop, label %for.end, label %vector.body 600 601for.end: ; preds = %vector.body 602 ret void 603 604; SSE4-LABEL: test19: 605; SSE4: pmaxsd 606 607; AVX1-LABEL: test19: 608; AVX1: vpmaxsd 609 610; AVX2-LABEL: test19: 611; AVX2: vpmaxsd 612} 613 614define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind { 615vector.ph: 616 br label %vector.body 617 618vector.body: ; preds = %vector.body, %vector.ph 619 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 620 %gep.a = getelementptr inbounds i32* %a, i64 %index 621 %gep.b = getelementptr inbounds i32* %b, i64 %index 622 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 623 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 624 %load.a = load <4 x i32>* %ptr.a, align 2 625 %load.b = load <4 x i32>* %ptr.b, align 2 626 %cmp = icmp sge <4 x i32> %load.a, %load.b 627 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 628 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 629 %index.next = add i64 %index, 4 630 %loop = icmp eq i64 %index.next, 16384 631 br i1 %loop, label %for.end, label %vector.body 632 633for.end: ; preds = %vector.body 634 ret void 635 636; SSE4-LABEL: test20: 637; SSE4: pmaxsd 638 639; AVX1-LABEL: test20: 640; AVX1: vpmaxsd 641 642; AVX2-LABEL: test20: 643; AVX2: vpmaxsd 644} 645 646define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind { 647vector.ph: 648 br label %vector.body 649 650vector.body: ; preds = %vector.body, %vector.ph 651 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 652 %gep.a = getelementptr inbounds i32* %a, i64 %index 653 %gep.b = getelementptr inbounds i32* %b, i64 %index 654 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 655 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 656 %load.a = load <4 x i32>* %ptr.a, align 2 657 %load.b = load <4 x i32>* %ptr.b, align 2 658 %cmp = icmp ult <4 x i32> %load.a, %load.b 659 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 660 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 661 %index.next = add i64 %index, 4 662 %loop = icmp eq i64 %index.next, 16384 663 br i1 %loop, label %for.end, label %vector.body 664 665for.end: ; preds = %vector.body 666 ret void 667 668; SSE4-LABEL: test21: 669; SSE4: pminud 670 671; AVX1-LABEL: test21: 672; AVX1: vpminud 673 674; AVX2-LABEL: test21: 675; AVX2: vpminud 676} 677 678define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind { 679vector.ph: 680 br label %vector.body 681 682vector.body: ; preds = %vector.body, %vector.ph 683 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 684 %gep.a = getelementptr inbounds i32* %a, i64 %index 685 %gep.b = getelementptr inbounds i32* %b, i64 %index 686 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 687 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 688 %load.a = load <4 x i32>* %ptr.a, align 2 689 %load.b = load <4 x i32>* %ptr.b, align 2 690 %cmp = icmp ule <4 x i32> %load.a, %load.b 691 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 692 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 693 %index.next = add i64 %index, 4 694 %loop = icmp eq i64 %index.next, 16384 695 br i1 %loop, label %for.end, label %vector.body 696 697for.end: ; preds = %vector.body 698 ret void 699 700; SSE4-LABEL: test22: 701; SSE4: pminud 702 703; AVX1-LABEL: test22: 704; AVX1: vpminud 705 706; AVX2-LABEL: test22: 707; AVX2: vpminud 708} 709 710define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind { 711vector.ph: 712 br label %vector.body 713 714vector.body: ; preds = %vector.body, %vector.ph 715 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 716 %gep.a = getelementptr inbounds i32* %a, i64 %index 717 %gep.b = getelementptr inbounds i32* %b, i64 %index 718 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 719 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 720 %load.a = load <4 x i32>* %ptr.a, align 2 721 %load.b = load <4 x i32>* %ptr.b, align 2 722 %cmp = icmp ugt <4 x i32> %load.a, %load.b 723 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 724 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 725 %index.next = add i64 %index, 4 726 %loop = icmp eq i64 %index.next, 16384 727 br i1 %loop, label %for.end, label %vector.body 728 729for.end: ; preds = %vector.body 730 ret void 731 732; SSE4-LABEL: test23: 733; SSE4: pmaxud 734 735; AVX1-LABEL: test23: 736; AVX1: vpmaxud 737 738; AVX2-LABEL: test23: 739; AVX2: vpmaxud 740} 741 742define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind { 743vector.ph: 744 br label %vector.body 745 746vector.body: ; preds = %vector.body, %vector.ph 747 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 748 %gep.a = getelementptr inbounds i32* %a, i64 %index 749 %gep.b = getelementptr inbounds i32* %b, i64 %index 750 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 751 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 752 %load.a = load <4 x i32>* %ptr.a, align 2 753 %load.b = load <4 x i32>* %ptr.b, align 2 754 %cmp = icmp uge <4 x i32> %load.a, %load.b 755 %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b 756 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 757 %index.next = add i64 %index, 4 758 %loop = icmp eq i64 %index.next, 16384 759 br i1 %loop, label %for.end, label %vector.body 760 761for.end: ; preds = %vector.body 762 ret void 763 764; SSE4-LABEL: test24: 765; SSE4: pmaxud 766 767; AVX1-LABEL: test24: 768; AVX1: vpmaxud 769 770; AVX2-LABEL: test24: 771; AVX2: vpmaxud 772} 773 774define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind { 775vector.ph: 776 br label %vector.body 777 778vector.body: ; preds = %vector.body, %vector.ph 779 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 780 %gep.a = getelementptr inbounds i8* %a, i64 %index 781 %gep.b = getelementptr inbounds i8* %b, i64 %index 782 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 783 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 784 %load.a = load <32 x i8>* %ptr.a, align 2 785 %load.b = load <32 x i8>* %ptr.b, align 2 786 %cmp = icmp slt <32 x i8> %load.a, %load.b 787 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 788 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 789 %index.next = add i64 %index, 32 790 %loop = icmp eq i64 %index.next, 16384 791 br i1 %loop, label %for.end, label %vector.body 792 793for.end: ; preds = %vector.body 794 ret void 795 796; AVX2-LABEL: test25: 797; AVX2: vpminsb 798} 799 800define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind { 801vector.ph: 802 br label %vector.body 803 804vector.body: ; preds = %vector.body, %vector.ph 805 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 806 %gep.a = getelementptr inbounds i8* %a, i64 %index 807 %gep.b = getelementptr inbounds i8* %b, i64 %index 808 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 809 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 810 %load.a = load <32 x i8>* %ptr.a, align 2 811 %load.b = load <32 x i8>* %ptr.b, align 2 812 %cmp = icmp sle <32 x i8> %load.a, %load.b 813 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 814 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 815 %index.next = add i64 %index, 32 816 %loop = icmp eq i64 %index.next, 16384 817 br i1 %loop, label %for.end, label %vector.body 818 819for.end: ; preds = %vector.body 820 ret void 821 822; AVX2-LABEL: test26: 823; AVX2: vpminsb 824} 825 826define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind { 827vector.ph: 828 br label %vector.body 829 830vector.body: ; preds = %vector.body, %vector.ph 831 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 832 %gep.a = getelementptr inbounds i8* %a, i64 %index 833 %gep.b = getelementptr inbounds i8* %b, i64 %index 834 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 835 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 836 %load.a = load <32 x i8>* %ptr.a, align 2 837 %load.b = load <32 x i8>* %ptr.b, align 2 838 %cmp = icmp sgt <32 x i8> %load.a, %load.b 839 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 840 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 841 %index.next = add i64 %index, 32 842 %loop = icmp eq i64 %index.next, 16384 843 br i1 %loop, label %for.end, label %vector.body 844 845for.end: ; preds = %vector.body 846 ret void 847 848; AVX2-LABEL: test27: 849; AVX2: vpmaxsb 850} 851 852define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind { 853vector.ph: 854 br label %vector.body 855 856vector.body: ; preds = %vector.body, %vector.ph 857 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 858 %gep.a = getelementptr inbounds i8* %a, i64 %index 859 %gep.b = getelementptr inbounds i8* %b, i64 %index 860 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 861 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 862 %load.a = load <32 x i8>* %ptr.a, align 2 863 %load.b = load <32 x i8>* %ptr.b, align 2 864 %cmp = icmp sge <32 x i8> %load.a, %load.b 865 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 866 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 867 %index.next = add i64 %index, 32 868 %loop = icmp eq i64 %index.next, 16384 869 br i1 %loop, label %for.end, label %vector.body 870 871for.end: ; preds = %vector.body 872 ret void 873 874; AVX2-LABEL: test28: 875; AVX2: vpmaxsb 876} 877 878define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind { 879vector.ph: 880 br label %vector.body 881 882vector.body: ; preds = %vector.body, %vector.ph 883 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 884 %gep.a = getelementptr inbounds i8* %a, i64 %index 885 %gep.b = getelementptr inbounds i8* %b, i64 %index 886 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 887 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 888 %load.a = load <32 x i8>* %ptr.a, align 2 889 %load.b = load <32 x i8>* %ptr.b, align 2 890 %cmp = icmp ult <32 x i8> %load.a, %load.b 891 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 892 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 893 %index.next = add i64 %index, 32 894 %loop = icmp eq i64 %index.next, 16384 895 br i1 %loop, label %for.end, label %vector.body 896 897for.end: ; preds = %vector.body 898 ret void 899 900; AVX2-LABEL: test29: 901; AVX2: vpminub 902} 903 904define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind { 905vector.ph: 906 br label %vector.body 907 908vector.body: ; preds = %vector.body, %vector.ph 909 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 910 %gep.a = getelementptr inbounds i8* %a, i64 %index 911 %gep.b = getelementptr inbounds i8* %b, i64 %index 912 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 913 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 914 %load.a = load <32 x i8>* %ptr.a, align 2 915 %load.b = load <32 x i8>* %ptr.b, align 2 916 %cmp = icmp ule <32 x i8> %load.a, %load.b 917 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 918 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 919 %index.next = add i64 %index, 32 920 %loop = icmp eq i64 %index.next, 16384 921 br i1 %loop, label %for.end, label %vector.body 922 923for.end: ; preds = %vector.body 924 ret void 925 926; AVX2-LABEL: test30: 927; AVX2: vpminub 928} 929 930define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind { 931vector.ph: 932 br label %vector.body 933 934vector.body: ; preds = %vector.body, %vector.ph 935 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 936 %gep.a = getelementptr inbounds i8* %a, i64 %index 937 %gep.b = getelementptr inbounds i8* %b, i64 %index 938 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 939 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 940 %load.a = load <32 x i8>* %ptr.a, align 2 941 %load.b = load <32 x i8>* %ptr.b, align 2 942 %cmp = icmp ugt <32 x i8> %load.a, %load.b 943 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 944 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 945 %index.next = add i64 %index, 32 946 %loop = icmp eq i64 %index.next, 16384 947 br i1 %loop, label %for.end, label %vector.body 948 949for.end: ; preds = %vector.body 950 ret void 951 952; AVX2-LABEL: test31: 953; AVX2: vpmaxub 954} 955 956define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind { 957vector.ph: 958 br label %vector.body 959 960vector.body: ; preds = %vector.body, %vector.ph 961 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 962 %gep.a = getelementptr inbounds i8* %a, i64 %index 963 %gep.b = getelementptr inbounds i8* %b, i64 %index 964 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 965 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 966 %load.a = load <32 x i8>* %ptr.a, align 2 967 %load.b = load <32 x i8>* %ptr.b, align 2 968 %cmp = icmp uge <32 x i8> %load.a, %load.b 969 %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b 970 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 971 %index.next = add i64 %index, 32 972 %loop = icmp eq i64 %index.next, 16384 973 br i1 %loop, label %for.end, label %vector.body 974 975for.end: ; preds = %vector.body 976 ret void 977 978; AVX2-LABEL: test32: 979; AVX2: vpmaxub 980} 981 982define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind { 983vector.ph: 984 br label %vector.body 985 986vector.body: ; preds = %vector.body, %vector.ph 987 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 988 %gep.a = getelementptr inbounds i16* %a, i64 %index 989 %gep.b = getelementptr inbounds i16* %b, i64 %index 990 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 991 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 992 %load.a = load <16 x i16>* %ptr.a, align 2 993 %load.b = load <16 x i16>* %ptr.b, align 2 994 %cmp = icmp slt <16 x i16> %load.a, %load.b 995 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 996 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 997 %index.next = add i64 %index, 16 998 %loop = icmp eq i64 %index.next, 16384 999 br i1 %loop, label %for.end, label %vector.body 1000 1001for.end: ; preds = %vector.body 1002 ret void 1003 1004; AVX2-LABEL: test33: 1005; AVX2: vpminsw 1006} 1007 1008define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind { 1009vector.ph: 1010 br label %vector.body 1011 1012vector.body: ; preds = %vector.body, %vector.ph 1013 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1014 %gep.a = getelementptr inbounds i16* %a, i64 %index 1015 %gep.b = getelementptr inbounds i16* %b, i64 %index 1016 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1017 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1018 %load.a = load <16 x i16>* %ptr.a, align 2 1019 %load.b = load <16 x i16>* %ptr.b, align 2 1020 %cmp = icmp sle <16 x i16> %load.a, %load.b 1021 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1022 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1023 %index.next = add i64 %index, 16 1024 %loop = icmp eq i64 %index.next, 16384 1025 br i1 %loop, label %for.end, label %vector.body 1026 1027for.end: ; preds = %vector.body 1028 ret void 1029 1030; AVX2-LABEL: test34: 1031; AVX2: vpminsw 1032} 1033 1034define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind { 1035vector.ph: 1036 br label %vector.body 1037 1038vector.body: ; preds = %vector.body, %vector.ph 1039 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1040 %gep.a = getelementptr inbounds i16* %a, i64 %index 1041 %gep.b = getelementptr inbounds i16* %b, i64 %index 1042 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1043 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1044 %load.a = load <16 x i16>* %ptr.a, align 2 1045 %load.b = load <16 x i16>* %ptr.b, align 2 1046 %cmp = icmp sgt <16 x i16> %load.a, %load.b 1047 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1048 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1049 %index.next = add i64 %index, 16 1050 %loop = icmp eq i64 %index.next, 16384 1051 br i1 %loop, label %for.end, label %vector.body 1052 1053for.end: ; preds = %vector.body 1054 ret void 1055 1056; AVX2-LABEL: test35: 1057; AVX2: vpmaxsw 1058} 1059 1060define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind { 1061vector.ph: 1062 br label %vector.body 1063 1064vector.body: ; preds = %vector.body, %vector.ph 1065 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1066 %gep.a = getelementptr inbounds i16* %a, i64 %index 1067 %gep.b = getelementptr inbounds i16* %b, i64 %index 1068 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1069 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1070 %load.a = load <16 x i16>* %ptr.a, align 2 1071 %load.b = load <16 x i16>* %ptr.b, align 2 1072 %cmp = icmp sge <16 x i16> %load.a, %load.b 1073 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1074 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1075 %index.next = add i64 %index, 16 1076 %loop = icmp eq i64 %index.next, 16384 1077 br i1 %loop, label %for.end, label %vector.body 1078 1079for.end: ; preds = %vector.body 1080 ret void 1081 1082; AVX2-LABEL: test36: 1083; AVX2: vpmaxsw 1084} 1085 1086define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind { 1087vector.ph: 1088 br label %vector.body 1089 1090vector.body: ; preds = %vector.body, %vector.ph 1091 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1092 %gep.a = getelementptr inbounds i16* %a, i64 %index 1093 %gep.b = getelementptr inbounds i16* %b, i64 %index 1094 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1095 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1096 %load.a = load <16 x i16>* %ptr.a, align 2 1097 %load.b = load <16 x i16>* %ptr.b, align 2 1098 %cmp = icmp ult <16 x i16> %load.a, %load.b 1099 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1100 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1101 %index.next = add i64 %index, 16 1102 %loop = icmp eq i64 %index.next, 16384 1103 br i1 %loop, label %for.end, label %vector.body 1104 1105for.end: ; preds = %vector.body 1106 ret void 1107 1108; AVX2-LABEL: test37: 1109; AVX2: vpminuw 1110} 1111 1112define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind { 1113vector.ph: 1114 br label %vector.body 1115 1116vector.body: ; preds = %vector.body, %vector.ph 1117 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1118 %gep.a = getelementptr inbounds i16* %a, i64 %index 1119 %gep.b = getelementptr inbounds i16* %b, i64 %index 1120 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1121 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1122 %load.a = load <16 x i16>* %ptr.a, align 2 1123 %load.b = load <16 x i16>* %ptr.b, align 2 1124 %cmp = icmp ule <16 x i16> %load.a, %load.b 1125 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1126 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1127 %index.next = add i64 %index, 16 1128 %loop = icmp eq i64 %index.next, 16384 1129 br i1 %loop, label %for.end, label %vector.body 1130 1131for.end: ; preds = %vector.body 1132 ret void 1133 1134; AVX2-LABEL: test38: 1135; AVX2: vpminuw 1136} 1137 1138define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind { 1139vector.ph: 1140 br label %vector.body 1141 1142vector.body: ; preds = %vector.body, %vector.ph 1143 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1144 %gep.a = getelementptr inbounds i16* %a, i64 %index 1145 %gep.b = getelementptr inbounds i16* %b, i64 %index 1146 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1147 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1148 %load.a = load <16 x i16>* %ptr.a, align 2 1149 %load.b = load <16 x i16>* %ptr.b, align 2 1150 %cmp = icmp ugt <16 x i16> %load.a, %load.b 1151 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1152 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1153 %index.next = add i64 %index, 16 1154 %loop = icmp eq i64 %index.next, 16384 1155 br i1 %loop, label %for.end, label %vector.body 1156 1157for.end: ; preds = %vector.body 1158 ret void 1159 1160; AVX2-LABEL: test39: 1161; AVX2: vpmaxuw 1162} 1163 1164define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind { 1165vector.ph: 1166 br label %vector.body 1167 1168vector.body: ; preds = %vector.body, %vector.ph 1169 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1170 %gep.a = getelementptr inbounds i16* %a, i64 %index 1171 %gep.b = getelementptr inbounds i16* %b, i64 %index 1172 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 1173 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 1174 %load.a = load <16 x i16>* %ptr.a, align 2 1175 %load.b = load <16 x i16>* %ptr.b, align 2 1176 %cmp = icmp uge <16 x i16> %load.a, %load.b 1177 %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b 1178 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 1179 %index.next = add i64 %index, 16 1180 %loop = icmp eq i64 %index.next, 16384 1181 br i1 %loop, label %for.end, label %vector.body 1182 1183for.end: ; preds = %vector.body 1184 ret void 1185 1186; AVX2-LABEL: test40: 1187; AVX2: vpmaxuw 1188} 1189 1190define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind { 1191vector.ph: 1192 br label %vector.body 1193 1194vector.body: ; preds = %vector.body, %vector.ph 1195 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1196 %gep.a = getelementptr inbounds i32* %a, i64 %index 1197 %gep.b = getelementptr inbounds i32* %b, i64 %index 1198 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1199 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1200 %load.a = load <8 x i32>* %ptr.a, align 2 1201 %load.b = load <8 x i32>* %ptr.b, align 2 1202 %cmp = icmp slt <8 x i32> %load.a, %load.b 1203 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1204 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1205 %index.next = add i64 %index, 8 1206 %loop = icmp eq i64 %index.next, 16384 1207 br i1 %loop, label %for.end, label %vector.body 1208 1209for.end: ; preds = %vector.body 1210 ret void 1211 1212; AVX2-LABEL: test41: 1213; AVX2: vpminsd 1214} 1215 1216define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind { 1217vector.ph: 1218 br label %vector.body 1219 1220vector.body: ; preds = %vector.body, %vector.ph 1221 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1222 %gep.a = getelementptr inbounds i32* %a, i64 %index 1223 %gep.b = getelementptr inbounds i32* %b, i64 %index 1224 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1225 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1226 %load.a = load <8 x i32>* %ptr.a, align 2 1227 %load.b = load <8 x i32>* %ptr.b, align 2 1228 %cmp = icmp sle <8 x i32> %load.a, %load.b 1229 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1230 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1231 %index.next = add i64 %index, 8 1232 %loop = icmp eq i64 %index.next, 16384 1233 br i1 %loop, label %for.end, label %vector.body 1234 1235for.end: ; preds = %vector.body 1236 ret void 1237 1238; AVX2-LABEL: test42: 1239; AVX2: vpminsd 1240} 1241 1242define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind { 1243vector.ph: 1244 br label %vector.body 1245 1246vector.body: ; preds = %vector.body, %vector.ph 1247 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1248 %gep.a = getelementptr inbounds i32* %a, i64 %index 1249 %gep.b = getelementptr inbounds i32* %b, i64 %index 1250 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1251 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1252 %load.a = load <8 x i32>* %ptr.a, align 2 1253 %load.b = load <8 x i32>* %ptr.b, align 2 1254 %cmp = icmp sgt <8 x i32> %load.a, %load.b 1255 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1256 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1257 %index.next = add i64 %index, 8 1258 %loop = icmp eq i64 %index.next, 16384 1259 br i1 %loop, label %for.end, label %vector.body 1260 1261for.end: ; preds = %vector.body 1262 ret void 1263 1264; AVX2-LABEL: test43: 1265; AVX2: vpmaxsd 1266} 1267 1268define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind { 1269vector.ph: 1270 br label %vector.body 1271 1272vector.body: ; preds = %vector.body, %vector.ph 1273 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1274 %gep.a = getelementptr inbounds i32* %a, i64 %index 1275 %gep.b = getelementptr inbounds i32* %b, i64 %index 1276 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1277 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1278 %load.a = load <8 x i32>* %ptr.a, align 2 1279 %load.b = load <8 x i32>* %ptr.b, align 2 1280 %cmp = icmp sge <8 x i32> %load.a, %load.b 1281 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1282 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1283 %index.next = add i64 %index, 8 1284 %loop = icmp eq i64 %index.next, 16384 1285 br i1 %loop, label %for.end, label %vector.body 1286 1287for.end: ; preds = %vector.body 1288 ret void 1289 1290; AVX2-LABEL: test44: 1291; AVX2: vpmaxsd 1292} 1293 1294define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind { 1295vector.ph: 1296 br label %vector.body 1297 1298vector.body: ; preds = %vector.body, %vector.ph 1299 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1300 %gep.a = getelementptr inbounds i32* %a, i64 %index 1301 %gep.b = getelementptr inbounds i32* %b, i64 %index 1302 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1303 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1304 %load.a = load <8 x i32>* %ptr.a, align 2 1305 %load.b = load <8 x i32>* %ptr.b, align 2 1306 %cmp = icmp ult <8 x i32> %load.a, %load.b 1307 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1308 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1309 %index.next = add i64 %index, 8 1310 %loop = icmp eq i64 %index.next, 16384 1311 br i1 %loop, label %for.end, label %vector.body 1312 1313for.end: ; preds = %vector.body 1314 ret void 1315 1316; AVX2-LABEL: test45: 1317; AVX2: vpminud 1318} 1319 1320define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind { 1321vector.ph: 1322 br label %vector.body 1323 1324vector.body: ; preds = %vector.body, %vector.ph 1325 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1326 %gep.a = getelementptr inbounds i32* %a, i64 %index 1327 %gep.b = getelementptr inbounds i32* %b, i64 %index 1328 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1329 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1330 %load.a = load <8 x i32>* %ptr.a, align 2 1331 %load.b = load <8 x i32>* %ptr.b, align 2 1332 %cmp = icmp ule <8 x i32> %load.a, %load.b 1333 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1334 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1335 %index.next = add i64 %index, 8 1336 %loop = icmp eq i64 %index.next, 16384 1337 br i1 %loop, label %for.end, label %vector.body 1338 1339for.end: ; preds = %vector.body 1340 ret void 1341 1342; AVX2-LABEL: test46: 1343; AVX2: vpminud 1344} 1345 1346define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind { 1347vector.ph: 1348 br label %vector.body 1349 1350vector.body: ; preds = %vector.body, %vector.ph 1351 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1352 %gep.a = getelementptr inbounds i32* %a, i64 %index 1353 %gep.b = getelementptr inbounds i32* %b, i64 %index 1354 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1355 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1356 %load.a = load <8 x i32>* %ptr.a, align 2 1357 %load.b = load <8 x i32>* %ptr.b, align 2 1358 %cmp = icmp ugt <8 x i32> %load.a, %load.b 1359 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1360 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1361 %index.next = add i64 %index, 8 1362 %loop = icmp eq i64 %index.next, 16384 1363 br i1 %loop, label %for.end, label %vector.body 1364 1365for.end: ; preds = %vector.body 1366 ret void 1367 1368; AVX2-LABEL: test47: 1369; AVX2: vpmaxud 1370} 1371 1372define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind { 1373vector.ph: 1374 br label %vector.body 1375 1376vector.body: ; preds = %vector.body, %vector.ph 1377 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1378 %gep.a = getelementptr inbounds i32* %a, i64 %index 1379 %gep.b = getelementptr inbounds i32* %b, i64 %index 1380 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 1381 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 1382 %load.a = load <8 x i32>* %ptr.a, align 2 1383 %load.b = load <8 x i32>* %ptr.b, align 2 1384 %cmp = icmp uge <8 x i32> %load.a, %load.b 1385 %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b 1386 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 1387 %index.next = add i64 %index, 8 1388 %loop = icmp eq i64 %index.next, 16384 1389 br i1 %loop, label %for.end, label %vector.body 1390 1391for.end: ; preds = %vector.body 1392 ret void 1393 1394; AVX2-LABEL: test48: 1395; AVX2: vpmaxud 1396} 1397 1398define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind { 1399vector.ph: 1400 br label %vector.body 1401 1402vector.body: ; preds = %vector.body, %vector.ph 1403 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1404 %gep.a = getelementptr inbounds i8* %a, i64 %index 1405 %gep.b = getelementptr inbounds i8* %b, i64 %index 1406 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1407 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1408 %load.a = load <16 x i8>* %ptr.a, align 2 1409 %load.b = load <16 x i8>* %ptr.b, align 2 1410 %cmp = icmp slt <16 x i8> %load.a, %load.b 1411 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1412 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1413 %index.next = add i64 %index, 16 1414 %loop = icmp eq i64 %index.next, 16384 1415 br i1 %loop, label %for.end, label %vector.body 1416 1417for.end: ; preds = %vector.body 1418 ret void 1419 1420; SSE4-LABEL: test49: 1421; SSE4: pmaxsb 1422 1423; AVX1-LABEL: test49: 1424; AVX1: vpmaxsb 1425 1426; AVX2-LABEL: test49: 1427; AVX2: vpmaxsb 1428} 1429 1430define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind { 1431vector.ph: 1432 br label %vector.body 1433 1434vector.body: ; preds = %vector.body, %vector.ph 1435 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1436 %gep.a = getelementptr inbounds i8* %a, i64 %index 1437 %gep.b = getelementptr inbounds i8* %b, i64 %index 1438 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1439 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1440 %load.a = load <16 x i8>* %ptr.a, align 2 1441 %load.b = load <16 x i8>* %ptr.b, align 2 1442 %cmp = icmp sle <16 x i8> %load.a, %load.b 1443 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1444 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1445 %index.next = add i64 %index, 16 1446 %loop = icmp eq i64 %index.next, 16384 1447 br i1 %loop, label %for.end, label %vector.body 1448 1449for.end: ; preds = %vector.body 1450 ret void 1451 1452; SSE4-LABEL: test50: 1453; SSE4: pmaxsb 1454 1455; AVX1-LABEL: test50: 1456; AVX1: vpmaxsb 1457 1458; AVX2-LABEL: test50: 1459; AVX2: vpmaxsb 1460} 1461 1462define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind { 1463vector.ph: 1464 br label %vector.body 1465 1466vector.body: ; preds = %vector.body, %vector.ph 1467 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1468 %gep.a = getelementptr inbounds i8* %a, i64 %index 1469 %gep.b = getelementptr inbounds i8* %b, i64 %index 1470 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1471 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1472 %load.a = load <16 x i8>* %ptr.a, align 2 1473 %load.b = load <16 x i8>* %ptr.b, align 2 1474 %cmp = icmp sgt <16 x i8> %load.a, %load.b 1475 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1476 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1477 %index.next = add i64 %index, 16 1478 %loop = icmp eq i64 %index.next, 16384 1479 br i1 %loop, label %for.end, label %vector.body 1480 1481for.end: ; preds = %vector.body 1482 ret void 1483 1484; SSE4-LABEL: test51: 1485; SSE4: pminsb 1486 1487; AVX1-LABEL: test51: 1488; AVX1: vpminsb 1489 1490; AVX2-LABEL: test51: 1491; AVX2: vpminsb 1492} 1493 1494define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind { 1495vector.ph: 1496 br label %vector.body 1497 1498vector.body: ; preds = %vector.body, %vector.ph 1499 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1500 %gep.a = getelementptr inbounds i8* %a, i64 %index 1501 %gep.b = getelementptr inbounds i8* %b, i64 %index 1502 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1503 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1504 %load.a = load <16 x i8>* %ptr.a, align 2 1505 %load.b = load <16 x i8>* %ptr.b, align 2 1506 %cmp = icmp sge <16 x i8> %load.a, %load.b 1507 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1508 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1509 %index.next = add i64 %index, 16 1510 %loop = icmp eq i64 %index.next, 16384 1511 br i1 %loop, label %for.end, label %vector.body 1512 1513for.end: ; preds = %vector.body 1514 ret void 1515 1516; SSE4-LABEL: test52: 1517; SSE4: pminsb 1518 1519; AVX1-LABEL: test52: 1520; AVX1: vpminsb 1521 1522; AVX2-LABEL: test52: 1523; AVX2: vpminsb 1524} 1525 1526define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind { 1527vector.ph: 1528 br label %vector.body 1529 1530vector.body: ; preds = %vector.body, %vector.ph 1531 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1532 %gep.a = getelementptr inbounds i8* %a, i64 %index 1533 %gep.b = getelementptr inbounds i8* %b, i64 %index 1534 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1535 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1536 %load.a = load <16 x i8>* %ptr.a, align 2 1537 %load.b = load <16 x i8>* %ptr.b, align 2 1538 %cmp = icmp ult <16 x i8> %load.a, %load.b 1539 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1540 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1541 %index.next = add i64 %index, 16 1542 %loop = icmp eq i64 %index.next, 16384 1543 br i1 %loop, label %for.end, label %vector.body 1544 1545for.end: ; preds = %vector.body 1546 ret void 1547 1548; SSE2-LABEL: test53: 1549; SSE2: pmaxub 1550 1551; AVX1-LABEL: test53: 1552; AVX1: vpmaxub 1553 1554; AVX2-LABEL: test53: 1555; AVX2: vpmaxub 1556} 1557 1558define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind { 1559vector.ph: 1560 br label %vector.body 1561 1562vector.body: ; preds = %vector.body, %vector.ph 1563 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1564 %gep.a = getelementptr inbounds i8* %a, i64 %index 1565 %gep.b = getelementptr inbounds i8* %b, i64 %index 1566 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1567 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1568 %load.a = load <16 x i8>* %ptr.a, align 2 1569 %load.b = load <16 x i8>* %ptr.b, align 2 1570 %cmp = icmp ule <16 x i8> %load.a, %load.b 1571 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1572 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1573 %index.next = add i64 %index, 16 1574 %loop = icmp eq i64 %index.next, 16384 1575 br i1 %loop, label %for.end, label %vector.body 1576 1577for.end: ; preds = %vector.body 1578 ret void 1579 1580; SSE2-LABEL: test54: 1581; SSE2: pmaxub 1582 1583; AVX1-LABEL: test54: 1584; AVX1: vpmaxub 1585 1586; AVX2-LABEL: test54: 1587; AVX2: vpmaxub 1588} 1589 1590define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind { 1591vector.ph: 1592 br label %vector.body 1593 1594vector.body: ; preds = %vector.body, %vector.ph 1595 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1596 %gep.a = getelementptr inbounds i8* %a, i64 %index 1597 %gep.b = getelementptr inbounds i8* %b, i64 %index 1598 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1599 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1600 %load.a = load <16 x i8>* %ptr.a, align 2 1601 %load.b = load <16 x i8>* %ptr.b, align 2 1602 %cmp = icmp ugt <16 x i8> %load.a, %load.b 1603 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1604 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1605 %index.next = add i64 %index, 16 1606 %loop = icmp eq i64 %index.next, 16384 1607 br i1 %loop, label %for.end, label %vector.body 1608 1609for.end: ; preds = %vector.body 1610 ret void 1611 1612; SSE2-LABEL: test55: 1613; SSE2: pminub 1614 1615; AVX1-LABEL: test55: 1616; AVX1: vpminub 1617 1618; AVX2-LABEL: test55: 1619; AVX2: vpminub 1620} 1621 1622define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind { 1623vector.ph: 1624 br label %vector.body 1625 1626vector.body: ; preds = %vector.body, %vector.ph 1627 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1628 %gep.a = getelementptr inbounds i8* %a, i64 %index 1629 %gep.b = getelementptr inbounds i8* %b, i64 %index 1630 %ptr.a = bitcast i8* %gep.a to <16 x i8>* 1631 %ptr.b = bitcast i8* %gep.b to <16 x i8>* 1632 %load.a = load <16 x i8>* %ptr.a, align 2 1633 %load.b = load <16 x i8>* %ptr.b, align 2 1634 %cmp = icmp uge <16 x i8> %load.a, %load.b 1635 %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a 1636 store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2 1637 %index.next = add i64 %index, 16 1638 %loop = icmp eq i64 %index.next, 16384 1639 br i1 %loop, label %for.end, label %vector.body 1640 1641for.end: ; preds = %vector.body 1642 ret void 1643 1644; SSE2-LABEL: test56: 1645; SSE2: pminub 1646 1647; AVX1-LABEL: test56: 1648; AVX1: vpminub 1649 1650; AVX2-LABEL: test56: 1651; AVX2: vpminub 1652} 1653 1654define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind { 1655vector.ph: 1656 br label %vector.body 1657 1658vector.body: ; preds = %vector.body, %vector.ph 1659 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1660 %gep.a = getelementptr inbounds i16* %a, i64 %index 1661 %gep.b = getelementptr inbounds i16* %b, i64 %index 1662 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1663 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1664 %load.a = load <8 x i16>* %ptr.a, align 2 1665 %load.b = load <8 x i16>* %ptr.b, align 2 1666 %cmp = icmp slt <8 x i16> %load.a, %load.b 1667 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1668 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1669 %index.next = add i64 %index, 8 1670 %loop = icmp eq i64 %index.next, 16384 1671 br i1 %loop, label %for.end, label %vector.body 1672 1673for.end: ; preds = %vector.body 1674 ret void 1675 1676; SSE2-LABEL: test57: 1677; SSE2: pmaxsw 1678 1679; AVX1-LABEL: test57: 1680; AVX1: vpmaxsw 1681 1682; AVX2-LABEL: test57: 1683; AVX2: vpmaxsw 1684} 1685 1686define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind { 1687vector.ph: 1688 br label %vector.body 1689 1690vector.body: ; preds = %vector.body, %vector.ph 1691 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1692 %gep.a = getelementptr inbounds i16* %a, i64 %index 1693 %gep.b = getelementptr inbounds i16* %b, i64 %index 1694 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1695 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1696 %load.a = load <8 x i16>* %ptr.a, align 2 1697 %load.b = load <8 x i16>* %ptr.b, align 2 1698 %cmp = icmp sle <8 x i16> %load.a, %load.b 1699 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1700 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1701 %index.next = add i64 %index, 8 1702 %loop = icmp eq i64 %index.next, 16384 1703 br i1 %loop, label %for.end, label %vector.body 1704 1705for.end: ; preds = %vector.body 1706 ret void 1707 1708; SSE2-LABEL: test58: 1709; SSE2: pmaxsw 1710 1711; AVX1-LABEL: test58: 1712; AVX1: vpmaxsw 1713 1714; AVX2-LABEL: test58: 1715; AVX2: vpmaxsw 1716} 1717 1718define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind { 1719vector.ph: 1720 br label %vector.body 1721 1722vector.body: ; preds = %vector.body, %vector.ph 1723 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1724 %gep.a = getelementptr inbounds i16* %a, i64 %index 1725 %gep.b = getelementptr inbounds i16* %b, i64 %index 1726 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1727 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1728 %load.a = load <8 x i16>* %ptr.a, align 2 1729 %load.b = load <8 x i16>* %ptr.b, align 2 1730 %cmp = icmp sgt <8 x i16> %load.a, %load.b 1731 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1732 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1733 %index.next = add i64 %index, 8 1734 %loop = icmp eq i64 %index.next, 16384 1735 br i1 %loop, label %for.end, label %vector.body 1736 1737for.end: ; preds = %vector.body 1738 ret void 1739 1740; SSE2-LABEL: test59: 1741; SSE2: pminsw 1742 1743; AVX1-LABEL: test59: 1744; AVX1: vpminsw 1745 1746; AVX2-LABEL: test59: 1747; AVX2: vpminsw 1748} 1749 1750define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind { 1751vector.ph: 1752 br label %vector.body 1753 1754vector.body: ; preds = %vector.body, %vector.ph 1755 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1756 %gep.a = getelementptr inbounds i16* %a, i64 %index 1757 %gep.b = getelementptr inbounds i16* %b, i64 %index 1758 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1759 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1760 %load.a = load <8 x i16>* %ptr.a, align 2 1761 %load.b = load <8 x i16>* %ptr.b, align 2 1762 %cmp = icmp sge <8 x i16> %load.a, %load.b 1763 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1764 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1765 %index.next = add i64 %index, 8 1766 %loop = icmp eq i64 %index.next, 16384 1767 br i1 %loop, label %for.end, label %vector.body 1768 1769for.end: ; preds = %vector.body 1770 ret void 1771 1772; SSE2-LABEL: test60: 1773; SSE2: pminsw 1774 1775; AVX1-LABEL: test60: 1776; AVX1: vpminsw 1777 1778; AVX2-LABEL: test60: 1779; AVX2: vpminsw 1780} 1781 1782define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind { 1783vector.ph: 1784 br label %vector.body 1785 1786vector.body: ; preds = %vector.body, %vector.ph 1787 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1788 %gep.a = getelementptr inbounds i16* %a, i64 %index 1789 %gep.b = getelementptr inbounds i16* %b, i64 %index 1790 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1791 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1792 %load.a = load <8 x i16>* %ptr.a, align 2 1793 %load.b = load <8 x i16>* %ptr.b, align 2 1794 %cmp = icmp ult <8 x i16> %load.a, %load.b 1795 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1796 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1797 %index.next = add i64 %index, 8 1798 %loop = icmp eq i64 %index.next, 16384 1799 br i1 %loop, label %for.end, label %vector.body 1800 1801for.end: ; preds = %vector.body 1802 ret void 1803 1804; SSE4-LABEL: test61: 1805; SSE4: pmaxuw 1806 1807; AVX1-LABEL: test61: 1808; AVX1: vpmaxuw 1809 1810; AVX2-LABEL: test61: 1811; AVX2: vpmaxuw 1812} 1813 1814define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind { 1815vector.ph: 1816 br label %vector.body 1817 1818vector.body: ; preds = %vector.body, %vector.ph 1819 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1820 %gep.a = getelementptr inbounds i16* %a, i64 %index 1821 %gep.b = getelementptr inbounds i16* %b, i64 %index 1822 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1823 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1824 %load.a = load <8 x i16>* %ptr.a, align 2 1825 %load.b = load <8 x i16>* %ptr.b, align 2 1826 %cmp = icmp ule <8 x i16> %load.a, %load.b 1827 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1828 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1829 %index.next = add i64 %index, 8 1830 %loop = icmp eq i64 %index.next, 16384 1831 br i1 %loop, label %for.end, label %vector.body 1832 1833for.end: ; preds = %vector.body 1834 ret void 1835 1836; SSE4-LABEL: test62: 1837; SSE4: pmaxuw 1838 1839; AVX1-LABEL: test62: 1840; AVX1: vpmaxuw 1841 1842; AVX2-LABEL: test62: 1843; AVX2: vpmaxuw 1844} 1845 1846define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind { 1847vector.ph: 1848 br label %vector.body 1849 1850vector.body: ; preds = %vector.body, %vector.ph 1851 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1852 %gep.a = getelementptr inbounds i16* %a, i64 %index 1853 %gep.b = getelementptr inbounds i16* %b, i64 %index 1854 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1855 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1856 %load.a = load <8 x i16>* %ptr.a, align 2 1857 %load.b = load <8 x i16>* %ptr.b, align 2 1858 %cmp = icmp ugt <8 x i16> %load.a, %load.b 1859 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1860 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1861 %index.next = add i64 %index, 8 1862 %loop = icmp eq i64 %index.next, 16384 1863 br i1 %loop, label %for.end, label %vector.body 1864 1865for.end: ; preds = %vector.body 1866 ret void 1867 1868; SSE4-LABEL: test63: 1869; SSE4: pminuw 1870 1871; AVX1-LABEL: test63: 1872; AVX1: vpminuw 1873 1874; AVX2-LABEL: test63: 1875; AVX2: vpminuw 1876} 1877 1878define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind { 1879vector.ph: 1880 br label %vector.body 1881 1882vector.body: ; preds = %vector.body, %vector.ph 1883 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1884 %gep.a = getelementptr inbounds i16* %a, i64 %index 1885 %gep.b = getelementptr inbounds i16* %b, i64 %index 1886 %ptr.a = bitcast i16* %gep.a to <8 x i16>* 1887 %ptr.b = bitcast i16* %gep.b to <8 x i16>* 1888 %load.a = load <8 x i16>* %ptr.a, align 2 1889 %load.b = load <8 x i16>* %ptr.b, align 2 1890 %cmp = icmp uge <8 x i16> %load.a, %load.b 1891 %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a 1892 store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2 1893 %index.next = add i64 %index, 8 1894 %loop = icmp eq i64 %index.next, 16384 1895 br i1 %loop, label %for.end, label %vector.body 1896 1897for.end: ; preds = %vector.body 1898 ret void 1899 1900; SSE4-LABEL: test64: 1901; SSE4: pminuw 1902 1903; AVX1-LABEL: test64: 1904; AVX1: vpminuw 1905 1906; AVX2-LABEL: test64: 1907; AVX2: vpminuw 1908} 1909 1910define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind { 1911vector.ph: 1912 br label %vector.body 1913 1914vector.body: ; preds = %vector.body, %vector.ph 1915 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1916 %gep.a = getelementptr inbounds i32* %a, i64 %index 1917 %gep.b = getelementptr inbounds i32* %b, i64 %index 1918 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 1919 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 1920 %load.a = load <4 x i32>* %ptr.a, align 2 1921 %load.b = load <4 x i32>* %ptr.b, align 2 1922 %cmp = icmp slt <4 x i32> %load.a, %load.b 1923 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 1924 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 1925 %index.next = add i64 %index, 4 1926 %loop = icmp eq i64 %index.next, 16384 1927 br i1 %loop, label %for.end, label %vector.body 1928 1929for.end: ; preds = %vector.body 1930 ret void 1931 1932; SSE4-LABEL: test65: 1933; SSE4: pmaxsd 1934 1935; AVX1-LABEL: test65: 1936; AVX1: vpmaxsd 1937 1938; AVX2-LABEL: test65: 1939; AVX2: vpmaxsd 1940} 1941 1942define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind { 1943vector.ph: 1944 br label %vector.body 1945 1946vector.body: ; preds = %vector.body, %vector.ph 1947 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1948 %gep.a = getelementptr inbounds i32* %a, i64 %index 1949 %gep.b = getelementptr inbounds i32* %b, i64 %index 1950 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 1951 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 1952 %load.a = load <4 x i32>* %ptr.a, align 2 1953 %load.b = load <4 x i32>* %ptr.b, align 2 1954 %cmp = icmp sle <4 x i32> %load.a, %load.b 1955 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 1956 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 1957 %index.next = add i64 %index, 4 1958 %loop = icmp eq i64 %index.next, 16384 1959 br i1 %loop, label %for.end, label %vector.body 1960 1961for.end: ; preds = %vector.body 1962 ret void 1963 1964; SSE4-LABEL: test66: 1965; SSE4: pmaxsd 1966 1967; AVX1-LABEL: test66: 1968; AVX1: vpmaxsd 1969 1970; AVX2-LABEL: test66: 1971; AVX2: vpmaxsd 1972} 1973 1974define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind { 1975vector.ph: 1976 br label %vector.body 1977 1978vector.body: ; preds = %vector.body, %vector.ph 1979 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 1980 %gep.a = getelementptr inbounds i32* %a, i64 %index 1981 %gep.b = getelementptr inbounds i32* %b, i64 %index 1982 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 1983 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 1984 %load.a = load <4 x i32>* %ptr.a, align 2 1985 %load.b = load <4 x i32>* %ptr.b, align 2 1986 %cmp = icmp sgt <4 x i32> %load.a, %load.b 1987 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 1988 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 1989 %index.next = add i64 %index, 4 1990 %loop = icmp eq i64 %index.next, 16384 1991 br i1 %loop, label %for.end, label %vector.body 1992 1993for.end: ; preds = %vector.body 1994 ret void 1995 1996; SSE4-LABEL: test67: 1997; SSE4: pminsd 1998 1999; AVX1-LABEL: test67: 2000; AVX1: vpminsd 2001 2002; AVX2-LABEL: test67: 2003; AVX2: vpminsd 2004} 2005 2006define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind { 2007vector.ph: 2008 br label %vector.body 2009 2010vector.body: ; preds = %vector.body, %vector.ph 2011 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2012 %gep.a = getelementptr inbounds i32* %a, i64 %index 2013 %gep.b = getelementptr inbounds i32* %b, i64 %index 2014 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2015 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2016 %load.a = load <4 x i32>* %ptr.a, align 2 2017 %load.b = load <4 x i32>* %ptr.b, align 2 2018 %cmp = icmp sge <4 x i32> %load.a, %load.b 2019 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2020 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2021 %index.next = add i64 %index, 4 2022 %loop = icmp eq i64 %index.next, 16384 2023 br i1 %loop, label %for.end, label %vector.body 2024 2025for.end: ; preds = %vector.body 2026 ret void 2027 2028; SSE4-LABEL: test68: 2029; SSE4: pminsd 2030 2031; AVX1-LABEL: test68: 2032; AVX1: vpminsd 2033 2034; AVX2-LABEL: test68: 2035; AVX2: vpminsd 2036} 2037 2038define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind { 2039vector.ph: 2040 br label %vector.body 2041 2042vector.body: ; preds = %vector.body, %vector.ph 2043 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2044 %gep.a = getelementptr inbounds i32* %a, i64 %index 2045 %gep.b = getelementptr inbounds i32* %b, i64 %index 2046 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2047 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2048 %load.a = load <4 x i32>* %ptr.a, align 2 2049 %load.b = load <4 x i32>* %ptr.b, align 2 2050 %cmp = icmp ult <4 x i32> %load.a, %load.b 2051 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2052 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2053 %index.next = add i64 %index, 4 2054 %loop = icmp eq i64 %index.next, 16384 2055 br i1 %loop, label %for.end, label %vector.body 2056 2057for.end: ; preds = %vector.body 2058 ret void 2059 2060; SSE4-LABEL: test69: 2061; SSE4: pmaxud 2062 2063; AVX1-LABEL: test69: 2064; AVX1: vpmaxud 2065 2066; AVX2-LABEL: test69: 2067; AVX2: vpmaxud 2068} 2069 2070define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind { 2071vector.ph: 2072 br label %vector.body 2073 2074vector.body: ; preds = %vector.body, %vector.ph 2075 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2076 %gep.a = getelementptr inbounds i32* %a, i64 %index 2077 %gep.b = getelementptr inbounds i32* %b, i64 %index 2078 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2079 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2080 %load.a = load <4 x i32>* %ptr.a, align 2 2081 %load.b = load <4 x i32>* %ptr.b, align 2 2082 %cmp = icmp ule <4 x i32> %load.a, %load.b 2083 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2084 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2085 %index.next = add i64 %index, 4 2086 %loop = icmp eq i64 %index.next, 16384 2087 br i1 %loop, label %for.end, label %vector.body 2088 2089for.end: ; preds = %vector.body 2090 ret void 2091 2092; SSE4-LABEL: test70: 2093; SSE4: pmaxud 2094 2095; AVX1-LABEL: test70: 2096; AVX1: vpmaxud 2097 2098; AVX2-LABEL: test70: 2099; AVX2: vpmaxud 2100} 2101 2102define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind { 2103vector.ph: 2104 br label %vector.body 2105 2106vector.body: ; preds = %vector.body, %vector.ph 2107 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2108 %gep.a = getelementptr inbounds i32* %a, i64 %index 2109 %gep.b = getelementptr inbounds i32* %b, i64 %index 2110 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2111 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2112 %load.a = load <4 x i32>* %ptr.a, align 2 2113 %load.b = load <4 x i32>* %ptr.b, align 2 2114 %cmp = icmp ugt <4 x i32> %load.a, %load.b 2115 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2116 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2117 %index.next = add i64 %index, 4 2118 %loop = icmp eq i64 %index.next, 16384 2119 br i1 %loop, label %for.end, label %vector.body 2120 2121for.end: ; preds = %vector.body 2122 ret void 2123 2124; SSE4-LABEL: test71: 2125; SSE4: pminud 2126 2127; AVX1-LABEL: test71: 2128; AVX1: vpminud 2129 2130; AVX2-LABEL: test71: 2131; AVX2: vpminud 2132} 2133 2134define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind { 2135vector.ph: 2136 br label %vector.body 2137 2138vector.body: ; preds = %vector.body, %vector.ph 2139 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2140 %gep.a = getelementptr inbounds i32* %a, i64 %index 2141 %gep.b = getelementptr inbounds i32* %b, i64 %index 2142 %ptr.a = bitcast i32* %gep.a to <4 x i32>* 2143 %ptr.b = bitcast i32* %gep.b to <4 x i32>* 2144 %load.a = load <4 x i32>* %ptr.a, align 2 2145 %load.b = load <4 x i32>* %ptr.b, align 2 2146 %cmp = icmp uge <4 x i32> %load.a, %load.b 2147 %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a 2148 store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2 2149 %index.next = add i64 %index, 4 2150 %loop = icmp eq i64 %index.next, 16384 2151 br i1 %loop, label %for.end, label %vector.body 2152 2153for.end: ; preds = %vector.body 2154 ret void 2155 2156; SSE4-LABEL: test72: 2157; SSE4: pminud 2158 2159; AVX1-LABEL: test72: 2160; AVX1: vpminud 2161 2162; AVX2-LABEL: test72: 2163; AVX2: vpminud 2164} 2165 2166define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind { 2167vector.ph: 2168 br label %vector.body 2169 2170vector.body: ; preds = %vector.body, %vector.ph 2171 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2172 %gep.a = getelementptr inbounds i8* %a, i64 %index 2173 %gep.b = getelementptr inbounds i8* %b, i64 %index 2174 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2175 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2176 %load.a = load <32 x i8>* %ptr.a, align 2 2177 %load.b = load <32 x i8>* %ptr.b, align 2 2178 %cmp = icmp slt <32 x i8> %load.a, %load.b 2179 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2180 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2181 %index.next = add i64 %index, 32 2182 %loop = icmp eq i64 %index.next, 16384 2183 br i1 %loop, label %for.end, label %vector.body 2184 2185for.end: ; preds = %vector.body 2186 ret void 2187 2188; AVX2-LABEL: test73: 2189; AVX2: vpmaxsb 2190} 2191 2192define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind { 2193vector.ph: 2194 br label %vector.body 2195 2196vector.body: ; preds = %vector.body, %vector.ph 2197 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2198 %gep.a = getelementptr inbounds i8* %a, i64 %index 2199 %gep.b = getelementptr inbounds i8* %b, i64 %index 2200 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2201 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2202 %load.a = load <32 x i8>* %ptr.a, align 2 2203 %load.b = load <32 x i8>* %ptr.b, align 2 2204 %cmp = icmp sle <32 x i8> %load.a, %load.b 2205 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2206 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2207 %index.next = add i64 %index, 32 2208 %loop = icmp eq i64 %index.next, 16384 2209 br i1 %loop, label %for.end, label %vector.body 2210 2211for.end: ; preds = %vector.body 2212 ret void 2213 2214; AVX2-LABEL: test74: 2215; AVX2: vpmaxsb 2216} 2217 2218define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind { 2219vector.ph: 2220 br label %vector.body 2221 2222vector.body: ; preds = %vector.body, %vector.ph 2223 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2224 %gep.a = getelementptr inbounds i8* %a, i64 %index 2225 %gep.b = getelementptr inbounds i8* %b, i64 %index 2226 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2227 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2228 %load.a = load <32 x i8>* %ptr.a, align 2 2229 %load.b = load <32 x i8>* %ptr.b, align 2 2230 %cmp = icmp sgt <32 x i8> %load.a, %load.b 2231 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2232 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2233 %index.next = add i64 %index, 32 2234 %loop = icmp eq i64 %index.next, 16384 2235 br i1 %loop, label %for.end, label %vector.body 2236 2237for.end: ; preds = %vector.body 2238 ret void 2239 2240; AVX2-LABEL: test75: 2241; AVX2: vpminsb 2242} 2243 2244define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind { 2245vector.ph: 2246 br label %vector.body 2247 2248vector.body: ; preds = %vector.body, %vector.ph 2249 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2250 %gep.a = getelementptr inbounds i8* %a, i64 %index 2251 %gep.b = getelementptr inbounds i8* %b, i64 %index 2252 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2253 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2254 %load.a = load <32 x i8>* %ptr.a, align 2 2255 %load.b = load <32 x i8>* %ptr.b, align 2 2256 %cmp = icmp sge <32 x i8> %load.a, %load.b 2257 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2258 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2259 %index.next = add i64 %index, 32 2260 %loop = icmp eq i64 %index.next, 16384 2261 br i1 %loop, label %for.end, label %vector.body 2262 2263for.end: ; preds = %vector.body 2264 ret void 2265 2266; AVX2-LABEL: test76: 2267; AVX2: vpminsb 2268} 2269 2270define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind { 2271vector.ph: 2272 br label %vector.body 2273 2274vector.body: ; preds = %vector.body, %vector.ph 2275 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2276 %gep.a = getelementptr inbounds i8* %a, i64 %index 2277 %gep.b = getelementptr inbounds i8* %b, i64 %index 2278 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2279 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2280 %load.a = load <32 x i8>* %ptr.a, align 2 2281 %load.b = load <32 x i8>* %ptr.b, align 2 2282 %cmp = icmp ult <32 x i8> %load.a, %load.b 2283 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2284 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2285 %index.next = add i64 %index, 32 2286 %loop = icmp eq i64 %index.next, 16384 2287 br i1 %loop, label %for.end, label %vector.body 2288 2289for.end: ; preds = %vector.body 2290 ret void 2291 2292; AVX2-LABEL: test77: 2293; AVX2: vpmaxub 2294} 2295 2296define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind { 2297vector.ph: 2298 br label %vector.body 2299 2300vector.body: ; preds = %vector.body, %vector.ph 2301 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2302 %gep.a = getelementptr inbounds i8* %a, i64 %index 2303 %gep.b = getelementptr inbounds i8* %b, i64 %index 2304 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2305 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2306 %load.a = load <32 x i8>* %ptr.a, align 2 2307 %load.b = load <32 x i8>* %ptr.b, align 2 2308 %cmp = icmp ule <32 x i8> %load.a, %load.b 2309 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2310 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2311 %index.next = add i64 %index, 32 2312 %loop = icmp eq i64 %index.next, 16384 2313 br i1 %loop, label %for.end, label %vector.body 2314 2315for.end: ; preds = %vector.body 2316 ret void 2317 2318; AVX2-LABEL: test78: 2319; AVX2: vpmaxub 2320} 2321 2322define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind { 2323vector.ph: 2324 br label %vector.body 2325 2326vector.body: ; preds = %vector.body, %vector.ph 2327 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2328 %gep.a = getelementptr inbounds i8* %a, i64 %index 2329 %gep.b = getelementptr inbounds i8* %b, i64 %index 2330 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2331 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2332 %load.a = load <32 x i8>* %ptr.a, align 2 2333 %load.b = load <32 x i8>* %ptr.b, align 2 2334 %cmp = icmp ugt <32 x i8> %load.a, %load.b 2335 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2336 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2337 %index.next = add i64 %index, 32 2338 %loop = icmp eq i64 %index.next, 16384 2339 br i1 %loop, label %for.end, label %vector.body 2340 2341for.end: ; preds = %vector.body 2342 ret void 2343 2344; AVX2-LABEL: test79: 2345; AVX2: vpminub 2346} 2347 2348define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind { 2349vector.ph: 2350 br label %vector.body 2351 2352vector.body: ; preds = %vector.body, %vector.ph 2353 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2354 %gep.a = getelementptr inbounds i8* %a, i64 %index 2355 %gep.b = getelementptr inbounds i8* %b, i64 %index 2356 %ptr.a = bitcast i8* %gep.a to <32 x i8>* 2357 %ptr.b = bitcast i8* %gep.b to <32 x i8>* 2358 %load.a = load <32 x i8>* %ptr.a, align 2 2359 %load.b = load <32 x i8>* %ptr.b, align 2 2360 %cmp = icmp uge <32 x i8> %load.a, %load.b 2361 %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a 2362 store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2 2363 %index.next = add i64 %index, 32 2364 %loop = icmp eq i64 %index.next, 16384 2365 br i1 %loop, label %for.end, label %vector.body 2366 2367for.end: ; preds = %vector.body 2368 ret void 2369 2370; AVX2-LABEL: test80: 2371; AVX2: vpminub 2372} 2373 2374define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind { 2375vector.ph: 2376 br label %vector.body 2377 2378vector.body: ; preds = %vector.body, %vector.ph 2379 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2380 %gep.a = getelementptr inbounds i16* %a, i64 %index 2381 %gep.b = getelementptr inbounds i16* %b, i64 %index 2382 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2383 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2384 %load.a = load <16 x i16>* %ptr.a, align 2 2385 %load.b = load <16 x i16>* %ptr.b, align 2 2386 %cmp = icmp slt <16 x i16> %load.a, %load.b 2387 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2388 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2389 %index.next = add i64 %index, 16 2390 %loop = icmp eq i64 %index.next, 16384 2391 br i1 %loop, label %for.end, label %vector.body 2392 2393for.end: ; preds = %vector.body 2394 ret void 2395 2396; AVX2-LABEL: test81: 2397; AVX2: vpmaxsw 2398} 2399 2400define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind { 2401vector.ph: 2402 br label %vector.body 2403 2404vector.body: ; preds = %vector.body, %vector.ph 2405 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2406 %gep.a = getelementptr inbounds i16* %a, i64 %index 2407 %gep.b = getelementptr inbounds i16* %b, i64 %index 2408 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2409 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2410 %load.a = load <16 x i16>* %ptr.a, align 2 2411 %load.b = load <16 x i16>* %ptr.b, align 2 2412 %cmp = icmp sle <16 x i16> %load.a, %load.b 2413 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2414 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2415 %index.next = add i64 %index, 16 2416 %loop = icmp eq i64 %index.next, 16384 2417 br i1 %loop, label %for.end, label %vector.body 2418 2419for.end: ; preds = %vector.body 2420 ret void 2421 2422; AVX2-LABEL: test82: 2423; AVX2: vpmaxsw 2424} 2425 2426define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind { 2427vector.ph: 2428 br label %vector.body 2429 2430vector.body: ; preds = %vector.body, %vector.ph 2431 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2432 %gep.a = getelementptr inbounds i16* %a, i64 %index 2433 %gep.b = getelementptr inbounds i16* %b, i64 %index 2434 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2435 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2436 %load.a = load <16 x i16>* %ptr.a, align 2 2437 %load.b = load <16 x i16>* %ptr.b, align 2 2438 %cmp = icmp sgt <16 x i16> %load.a, %load.b 2439 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2440 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2441 %index.next = add i64 %index, 16 2442 %loop = icmp eq i64 %index.next, 16384 2443 br i1 %loop, label %for.end, label %vector.body 2444 2445for.end: ; preds = %vector.body 2446 ret void 2447 2448; AVX2-LABEL: test83: 2449; AVX2: vpminsw 2450} 2451 2452define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind { 2453vector.ph: 2454 br label %vector.body 2455 2456vector.body: ; preds = %vector.body, %vector.ph 2457 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2458 %gep.a = getelementptr inbounds i16* %a, i64 %index 2459 %gep.b = getelementptr inbounds i16* %b, i64 %index 2460 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2461 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2462 %load.a = load <16 x i16>* %ptr.a, align 2 2463 %load.b = load <16 x i16>* %ptr.b, align 2 2464 %cmp = icmp sge <16 x i16> %load.a, %load.b 2465 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2466 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2467 %index.next = add i64 %index, 16 2468 %loop = icmp eq i64 %index.next, 16384 2469 br i1 %loop, label %for.end, label %vector.body 2470 2471for.end: ; preds = %vector.body 2472 ret void 2473 2474; AVX2-LABEL: test84: 2475; AVX2: vpminsw 2476} 2477 2478define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind { 2479vector.ph: 2480 br label %vector.body 2481 2482vector.body: ; preds = %vector.body, %vector.ph 2483 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2484 %gep.a = getelementptr inbounds i16* %a, i64 %index 2485 %gep.b = getelementptr inbounds i16* %b, i64 %index 2486 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2487 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2488 %load.a = load <16 x i16>* %ptr.a, align 2 2489 %load.b = load <16 x i16>* %ptr.b, align 2 2490 %cmp = icmp ult <16 x i16> %load.a, %load.b 2491 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2492 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2493 %index.next = add i64 %index, 16 2494 %loop = icmp eq i64 %index.next, 16384 2495 br i1 %loop, label %for.end, label %vector.body 2496 2497for.end: ; preds = %vector.body 2498 ret void 2499 2500; AVX2-LABEL: test85: 2501; AVX2: vpmaxuw 2502} 2503 2504define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind { 2505vector.ph: 2506 br label %vector.body 2507 2508vector.body: ; preds = %vector.body, %vector.ph 2509 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2510 %gep.a = getelementptr inbounds i16* %a, i64 %index 2511 %gep.b = getelementptr inbounds i16* %b, i64 %index 2512 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2513 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2514 %load.a = load <16 x i16>* %ptr.a, align 2 2515 %load.b = load <16 x i16>* %ptr.b, align 2 2516 %cmp = icmp ule <16 x i16> %load.a, %load.b 2517 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2518 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2519 %index.next = add i64 %index, 16 2520 %loop = icmp eq i64 %index.next, 16384 2521 br i1 %loop, label %for.end, label %vector.body 2522 2523for.end: ; preds = %vector.body 2524 ret void 2525 2526; AVX2-LABEL: test86: 2527; AVX2: vpmaxuw 2528} 2529 2530define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind { 2531vector.ph: 2532 br label %vector.body 2533 2534vector.body: ; preds = %vector.body, %vector.ph 2535 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2536 %gep.a = getelementptr inbounds i16* %a, i64 %index 2537 %gep.b = getelementptr inbounds i16* %b, i64 %index 2538 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2539 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2540 %load.a = load <16 x i16>* %ptr.a, align 2 2541 %load.b = load <16 x i16>* %ptr.b, align 2 2542 %cmp = icmp ugt <16 x i16> %load.a, %load.b 2543 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2544 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2545 %index.next = add i64 %index, 16 2546 %loop = icmp eq i64 %index.next, 16384 2547 br i1 %loop, label %for.end, label %vector.body 2548 2549for.end: ; preds = %vector.body 2550 ret void 2551 2552; AVX2-LABEL: test87: 2553; AVX2: vpminuw 2554} 2555 2556define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind { 2557vector.ph: 2558 br label %vector.body 2559 2560vector.body: ; preds = %vector.body, %vector.ph 2561 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2562 %gep.a = getelementptr inbounds i16* %a, i64 %index 2563 %gep.b = getelementptr inbounds i16* %b, i64 %index 2564 %ptr.a = bitcast i16* %gep.a to <16 x i16>* 2565 %ptr.b = bitcast i16* %gep.b to <16 x i16>* 2566 %load.a = load <16 x i16>* %ptr.a, align 2 2567 %load.b = load <16 x i16>* %ptr.b, align 2 2568 %cmp = icmp uge <16 x i16> %load.a, %load.b 2569 %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a 2570 store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2 2571 %index.next = add i64 %index, 16 2572 %loop = icmp eq i64 %index.next, 16384 2573 br i1 %loop, label %for.end, label %vector.body 2574 2575for.end: ; preds = %vector.body 2576 ret void 2577 2578; AVX2-LABEL: test88: 2579; AVX2: vpminuw 2580} 2581 2582define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind { 2583vector.ph: 2584 br label %vector.body 2585 2586vector.body: ; preds = %vector.body, %vector.ph 2587 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2588 %gep.a = getelementptr inbounds i32* %a, i64 %index 2589 %gep.b = getelementptr inbounds i32* %b, i64 %index 2590 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2591 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2592 %load.a = load <8 x i32>* %ptr.a, align 2 2593 %load.b = load <8 x i32>* %ptr.b, align 2 2594 %cmp = icmp slt <8 x i32> %load.a, %load.b 2595 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2596 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2597 %index.next = add i64 %index, 8 2598 %loop = icmp eq i64 %index.next, 16384 2599 br i1 %loop, label %for.end, label %vector.body 2600 2601for.end: ; preds = %vector.body 2602 ret void 2603 2604; AVX2-LABEL: test89: 2605; AVX2: vpmaxsd 2606} 2607 2608define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind { 2609vector.ph: 2610 br label %vector.body 2611 2612vector.body: ; preds = %vector.body, %vector.ph 2613 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2614 %gep.a = getelementptr inbounds i32* %a, i64 %index 2615 %gep.b = getelementptr inbounds i32* %b, i64 %index 2616 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2617 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2618 %load.a = load <8 x i32>* %ptr.a, align 2 2619 %load.b = load <8 x i32>* %ptr.b, align 2 2620 %cmp = icmp sle <8 x i32> %load.a, %load.b 2621 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2622 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2623 %index.next = add i64 %index, 8 2624 %loop = icmp eq i64 %index.next, 16384 2625 br i1 %loop, label %for.end, label %vector.body 2626 2627for.end: ; preds = %vector.body 2628 ret void 2629 2630; AVX2-LABEL: test90: 2631; AVX2: vpmaxsd 2632} 2633 2634define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind { 2635vector.ph: 2636 br label %vector.body 2637 2638vector.body: ; preds = %vector.body, %vector.ph 2639 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2640 %gep.a = getelementptr inbounds i32* %a, i64 %index 2641 %gep.b = getelementptr inbounds i32* %b, i64 %index 2642 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2643 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2644 %load.a = load <8 x i32>* %ptr.a, align 2 2645 %load.b = load <8 x i32>* %ptr.b, align 2 2646 %cmp = icmp sgt <8 x i32> %load.a, %load.b 2647 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2648 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2649 %index.next = add i64 %index, 8 2650 %loop = icmp eq i64 %index.next, 16384 2651 br i1 %loop, label %for.end, label %vector.body 2652 2653for.end: ; preds = %vector.body 2654 ret void 2655 2656; AVX2-LABEL: test91: 2657; AVX2: vpminsd 2658} 2659 2660define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind { 2661vector.ph: 2662 br label %vector.body 2663 2664vector.body: ; preds = %vector.body, %vector.ph 2665 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2666 %gep.a = getelementptr inbounds i32* %a, i64 %index 2667 %gep.b = getelementptr inbounds i32* %b, i64 %index 2668 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2669 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2670 %load.a = load <8 x i32>* %ptr.a, align 2 2671 %load.b = load <8 x i32>* %ptr.b, align 2 2672 %cmp = icmp sge <8 x i32> %load.a, %load.b 2673 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2674 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2675 %index.next = add i64 %index, 8 2676 %loop = icmp eq i64 %index.next, 16384 2677 br i1 %loop, label %for.end, label %vector.body 2678 2679for.end: ; preds = %vector.body 2680 ret void 2681 2682; AVX2-LABEL: test92: 2683; AVX2: vpminsd 2684} 2685 2686define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind { 2687vector.ph: 2688 br label %vector.body 2689 2690vector.body: ; preds = %vector.body, %vector.ph 2691 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2692 %gep.a = getelementptr inbounds i32* %a, i64 %index 2693 %gep.b = getelementptr inbounds i32* %b, i64 %index 2694 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2695 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2696 %load.a = load <8 x i32>* %ptr.a, align 2 2697 %load.b = load <8 x i32>* %ptr.b, align 2 2698 %cmp = icmp ult <8 x i32> %load.a, %load.b 2699 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2700 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2701 %index.next = add i64 %index, 8 2702 %loop = icmp eq i64 %index.next, 16384 2703 br i1 %loop, label %for.end, label %vector.body 2704 2705for.end: ; preds = %vector.body 2706 ret void 2707 2708; AVX2-LABEL: test93: 2709; AVX2: vpmaxud 2710} 2711 2712define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind { 2713vector.ph: 2714 br label %vector.body 2715 2716vector.body: ; preds = %vector.body, %vector.ph 2717 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2718 %gep.a = getelementptr inbounds i32* %a, i64 %index 2719 %gep.b = getelementptr inbounds i32* %b, i64 %index 2720 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2721 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2722 %load.a = load <8 x i32>* %ptr.a, align 2 2723 %load.b = load <8 x i32>* %ptr.b, align 2 2724 %cmp = icmp ule <8 x i32> %load.a, %load.b 2725 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2726 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2727 %index.next = add i64 %index, 8 2728 %loop = icmp eq i64 %index.next, 16384 2729 br i1 %loop, label %for.end, label %vector.body 2730 2731for.end: ; preds = %vector.body 2732 ret void 2733 2734; AVX2-LABEL: test94: 2735; AVX2: vpmaxud 2736} 2737 2738define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind { 2739vector.ph: 2740 br label %vector.body 2741 2742vector.body: ; preds = %vector.body, %vector.ph 2743 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2744 %gep.a = getelementptr inbounds i32* %a, i64 %index 2745 %gep.b = getelementptr inbounds i32* %b, i64 %index 2746 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2747 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2748 %load.a = load <8 x i32>* %ptr.a, align 2 2749 %load.b = load <8 x i32>* %ptr.b, align 2 2750 %cmp = icmp ugt <8 x i32> %load.a, %load.b 2751 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2752 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2753 %index.next = add i64 %index, 8 2754 %loop = icmp eq i64 %index.next, 16384 2755 br i1 %loop, label %for.end, label %vector.body 2756 2757for.end: ; preds = %vector.body 2758 ret void 2759 2760; AVX2-LABEL: test95: 2761; AVX2: vpminud 2762} 2763 2764define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind { 2765vector.ph: 2766 br label %vector.body 2767 2768vector.body: ; preds = %vector.body, %vector.ph 2769 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 2770 %gep.a = getelementptr inbounds i32* %a, i64 %index 2771 %gep.b = getelementptr inbounds i32* %b, i64 %index 2772 %ptr.a = bitcast i32* %gep.a to <8 x i32>* 2773 %ptr.b = bitcast i32* %gep.b to <8 x i32>* 2774 %load.a = load <8 x i32>* %ptr.a, align 2 2775 %load.b = load <8 x i32>* %ptr.b, align 2 2776 %cmp = icmp uge <8 x i32> %load.a, %load.b 2777 %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a 2778 store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2 2779 %index.next = add i64 %index, 8 2780 %loop = icmp eq i64 %index.next, 16384 2781 br i1 %loop, label %for.end, label %vector.body 2782 2783for.end: ; preds = %vector.body 2784 ret void 2785 2786; AVX2-LABEL: test96: 2787; AVX2: vpminud 2788} 2789