1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 4; CHECK-LABEL: addpd512: 5; CHECK: ## BB#0: ## %entry 6; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 7; CHECK-NEXT: retq 8entry: 9 %add.i = fadd <8 x double> %x, %y 10 ret <8 x double> %add.i 11} 12 13define <8 x double> @addpd512fold(<8 x double> %y) { 14; CHECK-LABEL: addpd512fold: 15; CHECK: ## BB#0: ## %entry 16; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 17; CHECK-NEXT: retq 18entry: 19 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 20 ret <8 x double> %add.i 21} 22 23define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 24; CHECK-LABEL: addps512: 25; CHECK: ## BB#0: ## %entry 26; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 27; CHECK-NEXT: retq 28entry: 29 %add.i = fadd <16 x float> %x, %y 30 ret <16 x float> %add.i 31} 32 33define <16 x float> @addps512fold(<16 x float> %y) { 34; CHECK-LABEL: addps512fold: 35; CHECK: ## BB#0: ## %entry 36; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 37; CHECK-NEXT: retq 38entry: 39 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 40 ret <16 x float> %add.i 41} 42 43define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 44; CHECK-LABEL: subpd512: 45; CHECK: ## BB#0: ## %entry 46; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 47; CHECK-NEXT: retq 48entry: 49 %sub.i = fsub <8 x double> %x, %y 50 ret <8 x double> %sub.i 51} 52 53define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 54; CHECK-LABEL: subpd512fold: 55; CHECK: ## BB#0: ## %entry 56; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 57; CHECK-NEXT: retq 58entry: 59 %tmp2 = load <8 x double>* %x, align 8 60 %sub.i = fsub <8 x double> %y, %tmp2 61 ret <8 x double> %sub.i 62} 63 64define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 65; CHECK-LABEL: subps512: 66; CHECK: ## BB#0: ## %entry 67; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 68; CHECK-NEXT: retq 69entry: 70 %sub.i = fsub <16 x float> %x, %y 71 ret <16 x float> %sub.i 72} 73 74define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 75; CHECK-LABEL: subps512fold: 76; CHECK: ## BB#0: ## %entry 77; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 78; CHECK-NEXT: retq 79entry: 80 %tmp2 = load <16 x float>* %x, align 4 81 %sub.i = fsub <16 x float> %y, %tmp2 82 ret <16 x float> %sub.i 83} 84 85define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 86; CHECK-LABEL: imulq512: 87; CHECK: ## BB#0: 88; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 89; CHECK-NEXT: vpsrlq $32, %zmm0, %zmm3 90; CHECK-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 91; CHECK-NEXT: vpsllq $32, %zmm3, %zmm3 92; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm2 93; CHECK-NEXT: vpsrlq $32, %zmm1, %zmm1 94; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 95; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0 96; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 97; CHECK-NEXT: retq 98 %z = mul <8 x i64>%x, %y 99 ret <8 x i64>%z 100} 101 102define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 103; CHECK-LABEL: mulpd512: 104; CHECK: ## BB#0: ## %entry 105; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 106; CHECK-NEXT: retq 107entry: 108 %mul.i = fmul <8 x double> %x, %y 109 ret <8 x double> %mul.i 110} 111 112define <8 x double> @mulpd512fold(<8 x double> %y) { 113; CHECK-LABEL: mulpd512fold: 114; CHECK: ## BB#0: ## %entry 115; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 116; CHECK-NEXT: retq 117entry: 118 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 119 ret <8 x double> %mul.i 120} 121 122define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 123; CHECK-LABEL: mulps512: 124; CHECK: ## BB#0: ## %entry 125; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 126; CHECK-NEXT: retq 127entry: 128 %mul.i = fmul <16 x float> %x, %y 129 ret <16 x float> %mul.i 130} 131 132define <16 x float> @mulps512fold(<16 x float> %y) { 133; CHECK-LABEL: mulps512fold: 134; CHECK: ## BB#0: ## %entry 135; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 136; CHECK-NEXT: retq 137entry: 138 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 139 ret <16 x float> %mul.i 140} 141 142define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 143; CHECK-LABEL: divpd512: 144; CHECK: ## BB#0: ## %entry 145; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 146; CHECK-NEXT: retq 147entry: 148 %div.i = fdiv <8 x double> %x, %y 149 ret <8 x double> %div.i 150} 151 152define <8 x double> @divpd512fold(<8 x double> %y) { 153; CHECK-LABEL: divpd512fold: 154; CHECK: ## BB#0: ## %entry 155; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 156; CHECK-NEXT: retq 157entry: 158 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 159 ret <8 x double> %div.i 160} 161 162define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 163; CHECK-LABEL: divps512: 164; CHECK: ## BB#0: ## %entry 165; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 166; CHECK-NEXT: retq 167entry: 168 %div.i = fdiv <16 x float> %x, %y 169 ret <16 x float> %div.i 170} 171 172define <16 x float> @divps512fold(<16 x float> %y) { 173; CHECK-LABEL: divps512fold: 174; CHECK: ## BB#0: ## %entry 175; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 176; CHECK-NEXT: retq 177entry: 178 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 179 ret <16 x float> %div.i 180} 181 182define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 183; CHECK-LABEL: vpaddq_test: 184; CHECK: ## BB#0: 185; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 186; CHECK-NEXT: retq 187 %x = add <8 x i64> %i, %j 188 ret <8 x i64> %x 189} 190 191define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 192; CHECK-LABEL: vpaddq_fold_test: 193; CHECK: ## BB#0: 194; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 195; CHECK-NEXT: retq 196 %tmp = load <8 x i64>* %j, align 4 197 %x = add <8 x i64> %i, %tmp 198 ret <8 x i64> %x 199} 200 201define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 202; CHECK-LABEL: vpaddq_broadcast_test: 203; CHECK: ## BB#0: 204; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 205; CHECK-NEXT: retq 206 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 207 ret <8 x i64> %x 208} 209 210define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 211; CHECK-LABEL: vpaddq_broadcast2_test: 212; CHECK: ## BB#0: 213; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 214; CHECK-NEXT: retq 215 %tmp = load i64* %j 216 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 217 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 218 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 219 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 220 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 221 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 222 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 223 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 224 %x = add <8 x i64> %i, %j.7 225 ret <8 x i64> %x 226} 227 228define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 229; CHECK-LABEL: vpaddd_test: 230; CHECK: ## BB#0: 231; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 232; CHECK-NEXT: retq 233 %x = add <16 x i32> %i, %j 234 ret <16 x i32> %x 235} 236 237define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 238; CHECK-LABEL: vpaddd_fold_test: 239; CHECK: ## BB#0: 240; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 241; CHECK-NEXT: retq 242 %tmp = load <16 x i32>* %j, align 4 243 %x = add <16 x i32> %i, %tmp 244 ret <16 x i32> %x 245} 246 247define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 248; CHECK-LABEL: vpaddd_broadcast_test: 249; CHECK: ## BB#0: 250; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 251; CHECK-NEXT: retq 252 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 253 ret <16 x i32> %x 254} 255 256define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 257; CHECK-LABEL: vpaddd_mask_test: 258; CHECK: ## BB#0: 259; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 260; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 261; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} 262; CHECK-NEXT: retq 263 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 264 %x = add <16 x i32> %i, %j 265 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 266 ret <16 x i32> %r 267} 268 269define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 270; CHECK-LABEL: vpaddd_maskz_test: 271; CHECK: ## BB#0: 272; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 273; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 274; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 275; CHECK-NEXT: retq 276 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 277 %x = add <16 x i32> %i, %j 278 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 279 ret <16 x i32> %r 280} 281 282define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 283; CHECK-LABEL: vpaddd_mask_fold_test: 284; CHECK: ## BB#0: 285; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 286; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 287; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} 288; CHECK-NEXT: retq 289 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 290 %j = load <16 x i32>* %j.ptr 291 %x = add <16 x i32> %i, %j 292 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 293 ret <16 x i32> %r 294} 295 296define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 297; CHECK-LABEL: vpaddd_mask_broadcast_test: 298; CHECK: ## BB#0: 299; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 300; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 301; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} 302; CHECK-NEXT: retq 303 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 304 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 305 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 306 ret <16 x i32> %r 307} 308 309define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 310; CHECK-LABEL: vpaddd_maskz_fold_test: 311; CHECK: ## BB#0: 312; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 313; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 314; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 315; CHECK-NEXT: retq 316 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 317 %j = load <16 x i32>* %j.ptr 318 %x = add <16 x i32> %i, %j 319 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 320 ret <16 x i32> %r 321} 322 323define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 324; CHECK-LABEL: vpaddd_maskz_broadcast_test: 325; CHECK: ## BB#0: 326; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 327; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 328; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} 329; CHECK-NEXT: retq 330 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 331 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 332 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 333 ret <16 x i32> %r 334} 335 336define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 337; CHECK-LABEL: vpsubq_test: 338; CHECK: ## BB#0: 339; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 340; CHECK-NEXT: retq 341 %x = sub <8 x i64> %i, %j 342 ret <8 x i64> %x 343} 344 345define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 346; CHECK-LABEL: vpsubd_test: 347; CHECK: ## BB#0: 348; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 349; CHECK-NEXT: retq 350 %x = sub <16 x i32> %i, %j 351 ret <16 x i32> %x 352} 353 354define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 355; CHECK-LABEL: vpmulld_test: 356; CHECK: ## BB#0: 357; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 358; CHECK-NEXT: retq 359 %x = mul <16 x i32> %i, %j 360 ret <16 x i32> %x 361} 362 363declare float @sqrtf(float) readnone 364define float @sqrtA(float %a) nounwind uwtable readnone ssp { 365; CHECK-LABEL: sqrtA: 366; CHECK: ## BB#0: ## %entry 367; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 368; CHECK-NEXT: retq 369entry: 370 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 371 ret float %conv1 372} 373 374declare double @sqrt(double) readnone 375define double @sqrtB(double %a) nounwind uwtable readnone ssp { 376; CHECK-LABEL: sqrtB: 377; CHECK: ## BB#0: ## %entry 378; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 379; CHECK-NEXT: retq 380entry: 381 %call = tail call double @sqrt(double %a) nounwind readnone 382 ret double %call 383} 384 385declare float @llvm.sqrt.f32(float) 386define float @sqrtC(float %a) nounwind { 387; CHECK-LABEL: sqrtC: 388; CHECK: ## BB#0: 389; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 390; CHECK-NEXT: retq 391 %b = call float @llvm.sqrt.f32(float %a) 392 ret float %b 393} 394 395declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 396define <16 x float> @sqrtD(<16 x float> %a) nounwind { 397; CHECK-LABEL: sqrtD: 398; CHECK: ## BB#0: 399; CHECK-NEXT: vsqrtps %zmm0, %zmm0 400; CHECK-NEXT: retq 401 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 402 ret <16 x float> %b 403} 404 405declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 406define <8 x double> @sqrtE(<8 x double> %a) nounwind { 407; CHECK-LABEL: sqrtE: 408; CHECK: ## BB#0: 409; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 410; CHECK-NEXT: retq 411 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 412 ret <8 x double> %b 413} 414 415define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 416; CHECK-LABEL: fadd_broadcast: 417; CHECK: ## BB#0: 418; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 419; CHECK-NEXT: retq 420 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 421 ret <16 x float> %b 422} 423 424define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 425; CHECK-LABEL: addq_broadcast: 426; CHECK: ## BB#0: 427; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 428; CHECK-NEXT: retq 429 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 430 ret <8 x i64> %b 431} 432 433define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 434; CHECK-LABEL: orq_broadcast: 435; CHECK: ## BB#0: 436; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 437; CHECK-NEXT: retq 438 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 439 ret <8 x i64> %b 440} 441 442define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 443; CHECK-LABEL: andd512fold: 444; CHECK: ## BB#0: ## %entry 445; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0 446; CHECK-NEXT: retq 447entry: 448 %a = load <16 x i32>* %x, align 4 449 %b = and <16 x i32> %y, %a 450 ret <16 x i32> %b 451} 452 453define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 454; CHECK-LABEL: andqbrst: 455; CHECK: ## BB#0: ## %entry 456; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 457; CHECK-NEXT: retq 458entry: 459 %a = load i64* %ap, align 8 460 %b = insertelement <8 x i64> undef, i64 %a, i32 0 461 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 462 %d = and <8 x i64> %p1, %c 463 ret <8 x i64>%d 464} 465 466; CHECK-LABEL: test_mask_vaddps 467; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 468; CHECK: ret 469define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 470 <16 x float> %j, <16 x i32> %mask1) 471 nounwind readnone { 472 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 473 %x = fadd <16 x float> %i, %j 474 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 475 ret <16 x float> %r 476} 477 478; CHECK-LABEL: test_mask_vmulps 479; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 480; CHECK: ret 481define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, 482 <16 x float> %j, <16 x i32> %mask1) 483 nounwind readnone { 484 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 485 %x = fmul <16 x float> %i, %j 486 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 487 ret <16 x float> %r 488} 489 490; CHECK-LABEL: test_mask_vminps 491; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 492; CHECK: ret 493define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, 494 <16 x float> %j, <16 x i32> %mask1) 495 nounwind readnone { 496 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 497 %cmp_res = fcmp olt <16 x float> %i, %j 498 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 499 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 500 ret <16 x float> %r 501} 502 503; CHECK-LABEL: test_mask_vminpd 504; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 505; CHECK: ret 506define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, 507 <8 x double> %j, <8 x i32> %mask1) 508 nounwind readnone { 509 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 510 %cmp_res = fcmp olt <8 x double> %i, %j 511 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 512 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 513 ret <8 x double> %r 514} 515 516; CHECK-LABEL: test_mask_vmaxps 517; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 518; CHECK: ret 519define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, 520 <16 x float> %j, <16 x i32> %mask1) 521 nounwind readnone { 522 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 523 %cmp_res = fcmp ogt <16 x float> %i, %j 524 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 525 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 526 ret <16 x float> %r 527} 528 529; CHECK-LABEL: test_mask_vmaxpd 530; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 531; CHECK: ret 532define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, 533 <8 x double> %j, <8 x i32> %mask1) 534 nounwind readnone { 535 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 536 %cmp_res = fcmp ogt <8 x double> %i, %j 537 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 538 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 539 ret <8 x double> %r 540} 541 542; CHECK-LABEL: test_mask_vsubps 543; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 544; CHECK: ret 545define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, 546 <16 x float> %j, <16 x i32> %mask1) 547 nounwind readnone { 548 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 549 %x = fsub <16 x float> %i, %j 550 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 551 ret <16 x float> %r 552} 553 554; CHECK-LABEL: test_mask_vdivps 555; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 556; CHECK: ret 557define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, 558 <16 x float> %j, <16 x i32> %mask1) 559 nounwind readnone { 560 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 561 %x = fdiv <16 x float> %i, %j 562 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 563 ret <16 x float> %r 564} 565 566; CHECK-LABEL: test_mask_vaddpd 567; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} 568; CHECK: ret 569define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, 570 <8 x double> %j, <8 x i64> %mask1) 571 nounwind readnone { 572 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 573 %x = fadd <8 x double> %i, %j 574 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 575 ret <8 x double> %r 576} 577 578; CHECK-LABEL: test_maskz_vaddpd 579; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}} 580; CHECK: ret 581define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, 582 <8 x i64> %mask1) nounwind readnone { 583 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 584 %x = fadd <8 x double> %i, %j 585 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 586 ret <8 x double> %r 587} 588 589; CHECK-LABEL: test_mask_fold_vaddpd 590; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}} 591; CHECK: ret 592define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, 593 <8 x double>* %j, <8 x i64> %mask1) 594 nounwind { 595 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 596 %tmp = load <8 x double>* %j, align 8 597 %x = fadd <8 x double> %i, %tmp 598 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 599 ret <8 x double> %r 600} 601 602; CHECK-LABEL: test_maskz_fold_vaddpd 603; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}} 604; CHECK: ret 605define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, 606 <8 x i64> %mask1) nounwind { 607 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 608 %tmp = load <8 x double>* %j, align 8 609 %x = fadd <8 x double> %i, %tmp 610 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 611 ret <8 x double> %r 612} 613 614; CHECK-LABEL: test_broadcast_vaddpd 615; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}} 616; CHECK: ret 617define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { 618 %tmp = load double* %j 619 %b = insertelement <8 x double> undef, double %tmp, i32 0 620 %c = shufflevector <8 x double> %b, <8 x double> undef, 621 <8 x i32> zeroinitializer 622 %x = fadd <8 x double> %c, %i 623 ret <8 x double> %x 624} 625 626; CHECK-LABEL: test_mask_broadcast_vaddpd 627; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}} 628; CHECK: ret 629define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, 630 double* %j, <8 x i64> %mask1) nounwind { 631 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 632 %tmp = load double* %j 633 %b = insertelement <8 x double> undef, double %tmp, i32 0 634 %c = shufflevector <8 x double> %b, <8 x double> undef, 635 <8 x i32> zeroinitializer 636 %x = fadd <8 x double> %c, %i 637 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 638 ret <8 x double> %r 639} 640 641; CHECK-LABEL: test_maskz_broadcast_vaddpd 642; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}} 643; CHECK: ret 644define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, 645 <8 x i64> %mask1) nounwind { 646 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 647 %tmp = load double* %j 648 %b = insertelement <8 x double> undef, double %tmp, i32 0 649 %c = shufflevector <8 x double> %b, <8 x double> undef, 650 <8 x i32> zeroinitializer 651 %x = fadd <8 x double> %c, %i 652 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 653 ret <8 x double> %r 654} 655