1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 3 4define i32 @test1(float %x) { 5; CHECK-LABEL: test1: 6; CHECK: ## %bb.0: 7; CHECK-NEXT: vmovd %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] 8; CHECK-NEXT: retq ## encoding: [0xc3] 9 %res = bitcast float %x to i32 10 ret i32 %res 11} 12 13define <4 x i32> @test2(i32 %x) { 14; CHECK-LABEL: test2: 15; CHECK: ## %bb.0: 16; CHECK-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 17; CHECK-NEXT: retq ## encoding: [0xc3] 18 %res = insertelement <4 x i32>undef, i32 %x, i32 0 19 ret <4 x i32>%res 20} 21 22define <2 x i64> @test3(i64 %x) { 23; CHECK-LABEL: test3: 24; CHECK: ## %bb.0: 25; CHECK-NEXT: vmovq %rdi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 26; CHECK-NEXT: retq ## encoding: [0xc3] 27 %res = insertelement <2 x i64>undef, i64 %x, i32 0 28 ret <2 x i64>%res 29} 30 31define <4 x i32> @test4(i32* %x) { 32; CHECK-LABEL: test4: 33; CHECK: ## %bb.0: 34; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 35; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 36; CHECK-NEXT: retq ## encoding: [0xc3] 37 %y = load i32, i32* %x 38 %res = insertelement <4 x i32>undef, i32 %y, i32 0 39 ret <4 x i32>%res 40} 41 42define void @test5(float %x, float* %y) { 43; CHECK-LABEL: test5: 44; CHECK: ## %bb.0: 45; CHECK-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 46; CHECK-NEXT: retq ## encoding: [0xc3] 47 store float %x, float* %y, align 4 48 ret void 49} 50 51define void @test6(double %x, double* %y) { 52; CHECK-LABEL: test6: 53; CHECK: ## %bb.0: 54; CHECK-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 55; CHECK-NEXT: retq ## encoding: [0xc3] 56 store double %x, double* %y, align 8 57 ret void 58} 59 60define float @test7(i32* %x) { 61; CHECK-LABEL: test7: 62; CHECK: ## %bb.0: 63; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 64; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 65; CHECK-NEXT: retq ## encoding: [0xc3] 66 %y = load i32, i32* %x 67 %res = bitcast i32 %y to float 68 ret float %res 69} 70 71define i32 @test8(<4 x i32> %x) { 72; CHECK-LABEL: test8: 73; CHECK: ## %bb.0: 74; CHECK-NEXT: vmovd %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] 75; CHECK-NEXT: retq ## encoding: [0xc3] 76 %res = extractelement <4 x i32> %x, i32 0 77 ret i32 %res 78} 79 80define i64 @test9(<2 x i64> %x) { 81; CHECK-LABEL: test9: 82; CHECK: ## %bb.0: 83; CHECK-NEXT: vmovq %xmm0, %rax ## EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 84; CHECK-NEXT: retq ## encoding: [0xc3] 85 %res = extractelement <2 x i64> %x, i32 0 86 ret i64 %res 87} 88 89define <4 x i32> @test10(i32* %x) { 90; CHECK-LABEL: test10: 91; CHECK: ## %bb.0: 92; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 93; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 94; CHECK-NEXT: retq ## encoding: [0xc3] 95 %y = load i32, i32* %x, align 4 96 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 97 ret <4 x i32>%res 98} 99 100define <4 x float> @test11(float* %x) { 101; CHECK-LABEL: test11: 102; CHECK: ## %bb.0: 103; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 104; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 105; CHECK-NEXT: retq ## encoding: [0xc3] 106 %y = load float, float* %x, align 4 107 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 108 ret <4 x float>%res 109} 110 111define <2 x double> @test12(double* %x) { 112; CHECK-LABEL: test12: 113; CHECK: ## %bb.0: 114; CHECK-NEXT: vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 115; CHECK-NEXT: ## xmm0 = mem[0],zero 116; CHECK-NEXT: retq ## encoding: [0xc3] 117 %y = load double, double* %x, align 8 118 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 119 ret <2 x double>%res 120} 121 122define <2 x i64> @test13(i64 %x) { 123; CHECK-LABEL: test13: 124; CHECK: ## %bb.0: 125; CHECK-NEXT: vmovq %rdi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] 126; CHECK-NEXT: retq ## encoding: [0xc3] 127 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 128 ret <2 x i64>%res 129} 130 131define <4 x i32> @test14(i32 %x) { 132; CHECK-LABEL: test14: 133; CHECK: ## %bb.0: 134; CHECK-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] 135; CHECK-NEXT: retq ## encoding: [0xc3] 136 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 137 ret <4 x i32>%res 138} 139 140define <4 x i32> @test15(i32* %x) { 141; CHECK-LABEL: test15: 142; CHECK: ## %bb.0: 143; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 144; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 145; CHECK-NEXT: retq ## encoding: [0xc3] 146 %y = load i32, i32* %x, align 4 147 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 148 ret <4 x i32>%res 149} 150 151define <16 x i32> @test16(i8 * %addr) { 152; CHECK-LABEL: test16: 153; CHECK: ## %bb.0: 154; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] 155; CHECK-NEXT: retq ## encoding: [0xc3] 156 %vaddr = bitcast i8* %addr to <16 x i32>* 157 %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 158 ret <16 x i32>%res 159} 160 161define <16 x i32> @test17(i8 * %addr) { 162; CHECK-LABEL: test17: 163; CHECK: ## %bb.0: 164; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] 165; CHECK-NEXT: retq ## encoding: [0xc3] 166 %vaddr = bitcast i8* %addr to <16 x i32>* 167 %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 168 ret <16 x i32>%res 169} 170 171define void @test18(i8 * %addr, <8 x i64> %data) { 172; CHECK-LABEL: test18: 173; CHECK: ## %bb.0: 174; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] 175; CHECK-NEXT: retq ## encoding: [0xc3] 176 %vaddr = bitcast i8* %addr to <8 x i64>* 177 store <8 x i64>%data, <8 x i64>* %vaddr, align 64 178 ret void 179} 180 181define void @test19(i8 * %addr, <16 x i32> %data) { 182; CHECK-LABEL: test19: 183; CHECK: ## %bb.0: 184; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] 185; CHECK-NEXT: retq ## encoding: [0xc3] 186 %vaddr = bitcast i8* %addr to <16 x i32>* 187 store <16 x i32>%data, <16 x i32>* %vaddr, align 1 188 ret void 189} 190 191define void @test20(i8 * %addr, <16 x i32> %data) { 192; CHECK-LABEL: test20: 193; CHECK: ## %bb.0: 194; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] 195; CHECK-NEXT: retq ## encoding: [0xc3] 196 %vaddr = bitcast i8* %addr to <16 x i32>* 197 store <16 x i32>%data, <16 x i32>* %vaddr, align 64 198 ret void 199} 200 201define <8 x i64> @test21(i8 * %addr) { 202; CHECK-LABEL: test21: 203; CHECK: ## %bb.0: 204; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] 205; CHECK-NEXT: retq ## encoding: [0xc3] 206 %vaddr = bitcast i8* %addr to <8 x i64>* 207 %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 208 ret <8 x i64>%res 209} 210 211define void @test22(i8 * %addr, <8 x i64> %data) { 212; CHECK-LABEL: test22: 213; CHECK: ## %bb.0: 214; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] 215; CHECK-NEXT: retq ## encoding: [0xc3] 216 %vaddr = bitcast i8* %addr to <8 x i64>* 217 store <8 x i64>%data, <8 x i64>* %vaddr, align 1 218 ret void 219} 220 221define <8 x i64> @test23(i8 * %addr) { 222; CHECK-LABEL: test23: 223; CHECK: ## %bb.0: 224; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] 225; CHECK-NEXT: retq ## encoding: [0xc3] 226 %vaddr = bitcast i8* %addr to <8 x i64>* 227 %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 228 ret <8 x i64>%res 229} 230 231define void @test24(i8 * %addr, <8 x double> %data) { 232; CHECK-LABEL: test24: 233; CHECK: ## %bb.0: 234; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] 235; CHECK-NEXT: retq ## encoding: [0xc3] 236 %vaddr = bitcast i8* %addr to <8 x double>* 237 store <8 x double>%data, <8 x double>* %vaddr, align 64 238 ret void 239} 240 241define <8 x double> @test25(i8 * %addr) { 242; CHECK-LABEL: test25: 243; CHECK: ## %bb.0: 244; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] 245; CHECK-NEXT: retq ## encoding: [0xc3] 246 %vaddr = bitcast i8* %addr to <8 x double>* 247 %res = load <8 x double>, <8 x double>* %vaddr, align 64 248 ret <8 x double>%res 249} 250 251define void @test26(i8 * %addr, <16 x float> %data) { 252; CHECK-LABEL: test26: 253; CHECK: ## %bb.0: 254; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] 255; CHECK-NEXT: retq ## encoding: [0xc3] 256 %vaddr = bitcast i8* %addr to <16 x float>* 257 store <16 x float>%data, <16 x float>* %vaddr, align 64 258 ret void 259} 260 261define <16 x float> @test27(i8 * %addr) { 262; CHECK-LABEL: test27: 263; CHECK: ## %bb.0: 264; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] 265; CHECK-NEXT: retq ## encoding: [0xc3] 266 %vaddr = bitcast i8* %addr to <16 x float>* 267 %res = load <16 x float>, <16 x float>* %vaddr, align 64 268 ret <16 x float>%res 269} 270 271define void @test28(i8 * %addr, <8 x double> %data) { 272; CHECK-LABEL: test28: 273; CHECK: ## %bb.0: 274; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] 275; CHECK-NEXT: retq ## encoding: [0xc3] 276 %vaddr = bitcast i8* %addr to <8 x double>* 277 store <8 x double>%data, <8 x double>* %vaddr, align 1 278 ret void 279} 280 281define <8 x double> @test29(i8 * %addr) { 282; CHECK-LABEL: test29: 283; CHECK: ## %bb.0: 284; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] 285; CHECK-NEXT: retq ## encoding: [0xc3] 286 %vaddr = bitcast i8* %addr to <8 x double>* 287 %res = load <8 x double>, <8 x double>* %vaddr, align 1 288 ret <8 x double>%res 289} 290 291define void @test30(i8 * %addr, <16 x float> %data) { 292; CHECK-LABEL: test30: 293; CHECK: ## %bb.0: 294; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] 295; CHECK-NEXT: retq ## encoding: [0xc3] 296 %vaddr = bitcast i8* %addr to <16 x float>* 297 store <16 x float>%data, <16 x float>* %vaddr, align 1 298 ret void 299} 300 301define <16 x float> @test31(i8 * %addr) { 302; CHECK-LABEL: test31: 303; CHECK: ## %bb.0: 304; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] 305; CHECK-NEXT: retq ## encoding: [0xc3] 306 %vaddr = bitcast i8* %addr to <16 x float>* 307 %res = load <16 x float>, <16 x float>* %vaddr, align 1 308 ret <16 x float>%res 309} 310 311define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 312; CHECK-LABEL: test32: 313; CHECK: ## %bb.0: 314; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9] 315; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07] 316; CHECK-NEXT: retq ## encoding: [0xc3] 317 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 318 %vaddr = bitcast i8* %addr to <16 x i32>* 319 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 320 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 321 ret <16 x i32>%res 322} 323 324define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 325; CHECK-LABEL: test33: 326; CHECK: ## %bb.0: 327; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x48,0x27,0xc9] 328; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x07] 329; CHECK-NEXT: retq ## encoding: [0xc3] 330 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 331 %vaddr = bitcast i8* %addr to <16 x i32>* 332 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 333 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 334 ret <16 x i32>%res 335} 336 337define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) { 338; CHECK-LABEL: test34: 339; CHECK: ## %bb.0: 340; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc8] 341; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x07] 342; CHECK-NEXT: retq ## encoding: [0xc3] 343 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 344 %vaddr = bitcast i8* %addr to <16 x i32>* 345 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 346 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 347 ret <16 x i32>%res 348} 349 350define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) { 351; CHECK-LABEL: test35: 352; CHECK: ## %bb.0: 353; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc8] 354; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x07] 355; CHECK-NEXT: retq ## encoding: [0xc3] 356 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 357 %vaddr = bitcast i8* %addr to <16 x i32>* 358 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 359 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 360 ret <16 x i32>%res 361} 362 363define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 364; CHECK-LABEL: test36: 365; CHECK: ## %bb.0: 366; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9] 367; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07] 368; CHECK-NEXT: retq ## encoding: [0xc3] 369 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 370 %vaddr = bitcast i8* %addr to <8 x i64>* 371 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 372 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 373 ret <8 x i64>%res 374} 375 376define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 377; CHECK-LABEL: test37: 378; CHECK: ## %bb.0: 379; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x27,0xc9] 380; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x07] 381; CHECK-NEXT: retq ## encoding: [0xc3] 382 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 383 %vaddr = bitcast i8* %addr to <8 x i64>* 384 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 385 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 386 ret <8 x i64>%res 387} 388 389define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) { 390; CHECK-LABEL: test38: 391; CHECK: ## %bb.0: 392; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc8] 393; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x07] 394; CHECK-NEXT: retq ## encoding: [0xc3] 395 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 396 %vaddr = bitcast i8* %addr to <8 x i64>* 397 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 398 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 399 ret <8 x i64>%res 400} 401 402define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) { 403; CHECK-LABEL: test39: 404; CHECK: ## %bb.0: 405; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc8] 406; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x07] 407; CHECK-NEXT: retq ## encoding: [0xc3] 408 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 409 %vaddr = bitcast i8* %addr to <8 x i64>* 410 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 411 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 412 ret <8 x i64>%res 413} 414 415define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 416; CHECK-LABEL: test40: 417; CHECK: ## %bb.0: 418; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] 419; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x0c] 420; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07] 421; CHECK-NEXT: retq ## encoding: [0xc3] 422 %mask = fcmp one <16 x float> %mask1, zeroinitializer 423 %vaddr = bitcast i8* %addr to <16 x float>* 424 %r = load <16 x float>, <16 x float>* %vaddr, align 64 425 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 426 ret <16 x float>%res 427} 428 429define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 430; CHECK-LABEL: test41: 431; CHECK: ## %bb.0: 432; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] 433; CHECK-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x0c] 434; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07] 435; CHECK-NEXT: retq ## encoding: [0xc3] 436 %mask = fcmp one <16 x float> %mask1, zeroinitializer 437 %vaddr = bitcast i8* %addr to <16 x float>* 438 %r = load <16 x float>, <16 x float>* %vaddr, align 1 439 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 440 ret <16 x float>%res 441} 442 443define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) { 444; CHECK-LABEL: test42: 445; CHECK: ## %bb.0: 446; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] 447; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x0c] 448; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07] 449; CHECK-NEXT: retq ## encoding: [0xc3] 450 %mask = fcmp one <16 x float> %mask1, zeroinitializer 451 %vaddr = bitcast i8* %addr to <16 x float>* 452 %r = load <16 x float>, <16 x float>* %vaddr, align 64 453 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 454 ret <16 x float>%res 455} 456 457define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) { 458; CHECK-LABEL: test43: 459; CHECK: ## %bb.0: 460; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] 461; CHECK-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x0c] 462; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07] 463; CHECK-NEXT: retq ## encoding: [0xc3] 464 %mask = fcmp one <16 x float> %mask1, zeroinitializer 465 %vaddr = bitcast i8* %addr to <16 x float>* 466 %r = load <16 x float>, <16 x float>* %vaddr, align 1 467 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 468 ret <16 x float>%res 469} 470 471define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 472; CHECK-LABEL: test44: 473; CHECK: ## %bb.0: 474; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2] 475; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x0c] 476; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07] 477; CHECK-NEXT: retq ## encoding: [0xc3] 478 %mask = fcmp one <8 x double> %mask1, zeroinitializer 479 %vaddr = bitcast i8* %addr to <8 x double>* 480 %r = load <8 x double>, <8 x double>* %vaddr, align 64 481 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 482 ret <8 x double>%res 483} 484 485define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 486; CHECK-LABEL: test45: 487; CHECK: ## %bb.0: 488; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2] 489; CHECK-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x0c] 490; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07] 491; CHECK-NEXT: retq ## encoding: [0xc3] 492 %mask = fcmp one <8 x double> %mask1, zeroinitializer 493 %vaddr = bitcast i8* %addr to <8 x double>* 494 %r = load <8 x double>, <8 x double>* %vaddr, align 1 495 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 496 ret <8 x double>%res 497} 498 499define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) { 500; CHECK-LABEL: test46: 501; CHECK: ## %bb.0: 502; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] 503; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x0c] 504; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07] 505; CHECK-NEXT: retq ## encoding: [0xc3] 506 %mask = fcmp one <8 x double> %mask1, zeroinitializer 507 %vaddr = bitcast i8* %addr to <8 x double>* 508 %r = load <8 x double>, <8 x double>* %vaddr, align 64 509 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 510 ret <8 x double>%res 511} 512 513define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) { 514; CHECK-LABEL: test47: 515; CHECK: ## %bb.0: 516; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] 517; CHECK-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x0c] 518; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07] 519; CHECK-NEXT: retq ## encoding: [0xc3] 520 %mask = fcmp one <8 x double> %mask1, zeroinitializer 521 %vaddr = bitcast i8* %addr to <8 x double>* 522 %r = load <8 x double>, <8 x double>* %vaddr, align 1 523 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 524 ret <8 x double>%res 525} 526