1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 2 3declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone 4; CHECK-LABEL: test_kortestz 5; CHECK: kortestw 6; CHECK: sete 7define i32 @test_kortestz(i16 %a0, i16 %a1) { 8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) 9 ret i32 %res 10} 11 12declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone 13; CHECK-LABEL: test_kortestc 14; CHECK: kortestw 15; CHECK: sbbl 16define i32 @test_kortestc(i16 %a0, i16 %a1) { 17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) 18 ret i32 %res 19} 20 21declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone 22; CHECK-LABEL: test_kand 23; CHECK: kandw 24; CHECK: kandw 25define i16 @test_kand(i16 %a0, i16 %a1) { 26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) 27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) 28 ret i16 %t2 29} 30 31declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone 32; CHECK-LABEL: test_knot 33; CHECK: knotw 34define i16 @test_knot(i16 %a0) { 35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) 36 ret i16 %res 37} 38 39declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone 40 41; CHECK-LABEL: unpckbw_test 42; CHECK: kunpckbw 43; CHECK:ret 44define i16 @unpckbw_test(i16 %a0, i16 %a1) { 45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) 46 ret i16 %res 47} 48 49define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { 50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0] 51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 52 ret <16 x float> %res 53} 54declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 55 56define <8 x double> @test_rcp_pd_512(<8 x double> %a0) { 57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0] 58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] 59 ret <8 x double> %res 60} 61declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone 62 63declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 64 65define <8 x double> @test7(<8 x double> %a) { 66; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b] 67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4) 68 ret <8 x double>%res 69} 70 71declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 72 73define <16 x float> @test8(<16 x float> %a) { 74; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b] 75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4) 76 ret <16 x float>%res 77} 78 79define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { 80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0] 81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 82 ret <16 x float> %res 83} 84declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 85 86define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { 87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0] 88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] 89 ret <4 x float> %res 90} 91declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 92 93define <4 x float> @test_rcp14_ss(<4 x float> %a0) { 94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0] 95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] 96 ret <4 x float> %res 97} 98declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 99 100define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { 101 ; CHECK: vsqrtpd 102 %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1] 103 ret <8 x double> %res 104} 105declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 106 107define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { 108 ; CHECK: vsqrtps 109 %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1] 110 ret <16 x float> %res 111} 112declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 113 114define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) { 115 ; CHECK: vsqrtss {{.*}}encoding: [0x62 116 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 117 ret <4 x float> %res 118} 119declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone 120 121define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) { 122 ; CHECK: vsqrtsd {{.*}}encoding: [0x62 123 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 124 ret <2 x double> %res 125} 126declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone 127 128define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { 129 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62 130 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 131 ret i64 %res 132} 133declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 134 135define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { 136 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62 137 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 138 ret <2 x double> %res 139} 140declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 141 142define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) { 143 ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62 144 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 145 ret <2 x double> %res 146} 147declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone 148 149define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { 150 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62 151 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 152 ret i64 %res 153} 154declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 155 156 157define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { 158 ; CHECK: vcvtss2si {{.*}}encoding: [0x62 159 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] 160 ret i64 %res 161} 162declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 163 164 165define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { 166 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62 167 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] 168 ret <4 x float> %res 169} 170declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 171 172 173define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { 174 ; CHECK: vcvttss2si {{.*}}encoding: [0x62 175 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1] 176 ret i64 %res 177} 178declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 179 180define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { 181 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62 182 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1] 183 ret i64 %res 184} 185declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone 186 187define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { 188 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0] 189 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 190 ret <16 x float> %res 191} 192declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly 193 194 195define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) { 196 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02] 197 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) 198 ret <16 x i16> %res 199} 200 201declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly 202 203define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) { 204 ; CHECK: vbroadcastss 205 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1] 206 ret <16 x float> %res 207} 208declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly 209 210define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) { 211 ; CHECK: vbroadcastsd 212 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1] 213 ret <8 x double> %res 214} 215declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly 216 217define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) { 218 ; CHECK: vbroadcastss 219 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1] 220 ret <16 x float> %res 221} 222declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly 223 224define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) { 225 ; CHECK: vbroadcastsd 226 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1] 227 ret <8 x double> %res 228} 229declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly 230 231define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) { 232 ; CHECK: vpbroadcastd 233 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1] 234 ret <16 x i32> %res 235} 236declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly 237 238define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) { 239 ; CHECK: vpbroadcastd 240 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1] 241 ret <16 x i32> %res 242} 243declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly 244 245define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) { 246 ; CHECK: vpbroadcastq 247 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1] 248 ret <8 x i64> %res 249} 250declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly 251 252define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) { 253 ; CHECK: vpbroadcastq 254 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1] 255 ret <8 x i64> %res 256} 257declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly 258 259define <16 x i32> @test_conflict_d(<16 x i32> %a) { 260 ; CHECK: movw $-1, %ax 261 ; CHECK: vpxor 262 ; CHECK: vpconflictd 263 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 264 ret <16 x i32> %res 265} 266 267declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 268 269define <8 x i64> @test_conflict_q(<8 x i64> %a) { 270 ; CHECK: movb $-1, %al 271 ; CHECK: vpxor 272 ; CHECK: vpconflictq 273 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 274 ret <8 x i64> %res 275} 276 277declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 278 279define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { 280 ; CHECK: vpconflictd 281 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) 282 ret <16 x i32> %res 283} 284 285define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 286 ; CHECK: vpconflictq 287 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 288 ret <8 x i64> %res 289} 290 291define <16 x i32> @test_lzcnt_d(<16 x i32> %a) { 292 ; CHECK: movw $-1, %ax 293 ; CHECK: vpxor 294 ; CHECK: vplzcntd 295 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 296 ret <16 x i32> %res 297} 298 299declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 300 301define <8 x i64> @test_lzcnt_q(<8 x i64> %a) { 302 ; CHECK: movb $-1, %al 303 ; CHECK: vpxor 304 ; CHECK: vplzcntq 305 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 306 ret <8 x i64> %res 307} 308 309declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 310 311 312define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 313 ; CHECK: vplzcntd 314 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 315 ret <16 x i32> %res 316} 317 318define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 319 ; CHECK: vplzcntq 320 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 321 ret <8 x i64> %res 322} 323 324define <16 x i32> @test_ctlz_d(<16 x i32> %a) { 325 ; CHECK-LABEL: test_ctlz_d 326 ; CHECK: vplzcntd 327 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) 328 ret <16 x i32> %res 329} 330 331declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly 332 333define <8 x i64> @test_ctlz_q(<8 x i64> %a) { 334 ; CHECK-LABEL: test_ctlz_q 335 ; CHECK: vplzcntq 336 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) 337 ret <8 x i64> %res 338} 339 340declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly 341 342define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { 343 ; CHECK: vblendmps %zmm1, %zmm0 344 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1] 345 ret <16 x float> %res 346} 347 348declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly 349 350define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) { 351 ; CHECK: vblendmpd %zmm1, %zmm0 352 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1] 353 ret <8 x double> %res 354} 355 356define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) { 357 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop 358 ; CHECK: vblendmpd (% 359 %b = load <8 x double>* %ptr 360 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1] 361 ret <8 x double> %res 362} 363declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly 364 365define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) { 366 ; CHECK: vpblendmd 367 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1] 368 ret <16 x i32> %res 369} 370declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 371 372define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { 373 ; CHECK: vpblendmq 374 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1] 375 ret <8 x i64> %res 376} 377declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 378 379 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) { 380 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0] 381 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2) 382 ret <8 x i32>%res 383 } 384 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 385 386 define <16 x i32> @test_cvtps2udq(<16 x float> %a) { 387 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0] 388 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1) 389 ret <16 x i32>%res 390 } 391 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32) 392 393 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { 394 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02] 395 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) 396 ret i16 %res 397 } 398 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) 399 400 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { 401 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04] 402 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) 403 ret i8 %res 404 } 405 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) 406 407 ; cvt intrinsics 408 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) { 409 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0] 410 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1) 411 ret <16 x float>%res 412 } 413 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) 414 415 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) { 416 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0] 417 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1) 418 ret <16 x float>%res 419 } 420 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) 421 422 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) { 423 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0] 424 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1) 425 ret <8 x double>%res 426 } 427 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) 428 429 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) { 430 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0] 431 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1) 432 ret <8 x double>%res 433 } 434 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) 435 436 ; fp min - max 437define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) { 438 ; CHECK: vmaxps 439 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, 440 <16 x float>zeroinitializer, i16 -1, i32 4) 441 ret <16 x float> %res 442} 443declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, 444 <16 x float>, i16, i32) 445 446define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) { 447 ; CHECK: vmaxpd 448 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, 449 <8 x double>zeroinitializer, i8 -1, i32 4) 450 ret <8 x double> %res 451} 452declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>, 453 <8 x double>, i8, i32) 454 455define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) { 456 ; CHECK: vminps 457 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, 458 <16 x float>zeroinitializer, i16 -1, i32 4) 459 ret <16 x float> %res 460} 461declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, 462 <16 x float>, i16, i32) 463 464define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) { 465 ; CHECK: vminpd 466 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, 467 <8 x double>zeroinitializer, i8 -1, i32 4) 468 ret <8 x double> %res 469} 470declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, 471 <8 x double>, i8, i32) 472 473 define <8 x float> @test_cvtpd2ps(<8 x double> %a) { 474 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0] 475 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1) 476 ret <8 x float>%res 477 } 478 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32) 479 480 define <16 x i32> @test_pabsd(<16 x i32> %a) { 481 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0] 482 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1) 483 ret < 16 x i32> %res 484 } 485 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) 486 487 define <8 x i64> @test_pabsq(<8 x i64> %a) { 488 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0] 489 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1) 490 ret <8 x i64> %res 491 } 492 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) 493 494define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) { 495 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1] 496 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1, 497 <8 x i64>zeroinitializer, i8 -1) 498 ret <8 x i64> %res 499} 500declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 501 502define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) { 503 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1] 504 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1, 505 <16 x i32>zeroinitializer, i16 -1) 506 ret <16 x i32> %res 507} 508declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 509 510define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) { 511 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1] 512 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1, 513 <16 x i32>zeroinitializer, i16 -1) 514 ret <16 x i32> %res 515} 516declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 517 518define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) { 519 ; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1] 520 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1, 521 <8 x i64>zeroinitializer, i8 -1) 522 ret <8 x i64> %res 523} 524declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 525 526define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) { 527 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1] 528 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) 529 ret i8 %res 530} 531declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8) 532 533define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) { 534 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1] 535 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) 536 ret i16 %res 537} 538declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16) 539 540define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) { 541; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07] 542 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 543 ret void 544} 545 546declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) 547 548define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) { 549; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07] 550 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 551 ret void 552} 553 554declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 ) 555 556define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) { 557; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1] 558 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1) 559 ret <16 x float> %res 560} 561 562define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) { 563; CHECK-LABEL: test_vpermt2ps_mask: 564; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1] 565 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask) 566 ret <16 x float> %res 567} 568 569declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) 570 571define <8 x i64> @test_vmovntdqa(i8 *%x) { 572; CHECK-LABEL: test_vmovntdqa: 573; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07] 574 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x) 575 ret <8 x i64> %res 576} 577 578declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) 579 580define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { 581; CHECK-LABEL: test_valign_q: 582; CHECK: valignq $2, %zmm1, %zmm0, %zmm0 583 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1) 584 ret <8 x i64> %res 585} 586 587define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) { 588; CHECK-LABEL: test_mask_valign_q: 589; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} 590 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask) 591 ret <8 x i64> %res 592} 593 594declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8) 595 596define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 597; CHECK-LABEL: test_maskz_valign_d: 598; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05] 599 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask) 600 ret <16 x i32> %res 601} 602 603declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16) 604 605define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { 606 ; CHECK-LABEL: test_mask_store_ss 607 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07] 608 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask) 609 ret void 610} 611 612declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 ) 613 614define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { 615; CHECK-LABEL: test_pcmpeq_d 616; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ## 617 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 618 ret i16 %res 619} 620 621define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 622; CHECK-LABEL: test_mask_pcmpeq_d 623; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## 624 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 625 ret i16 %res 626} 627 628declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) 629 630define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { 631; CHECK-LABEL: test_pcmpeq_q 632; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ## 633 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 634 ret i8 %res 635} 636 637define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 638; CHECK-LABEL: test_mask_pcmpeq_q 639; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## 640 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 641 ret i8 %res 642} 643 644declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) 645 646define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) { 647; CHECK-LABEL: test_pcmpgt_d 648; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ## 649 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 650 ret i16 %res 651} 652 653define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 654; CHECK-LABEL: test_mask_pcmpgt_d 655; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ## 656 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 657 ret i16 %res 658} 659 660declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) 661 662define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) { 663; CHECK-LABEL: test_pcmpgt_q 664; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ## 665 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 666 ret i8 %res 667} 668 669define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 670; CHECK-LABEL: test_mask_pcmpgt_q 671; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ## 672 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 673 ret i8 %res 674} 675 676declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) 677 678define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 679; CHECK_LABEL: test_cmp_d_512 680; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ## 681 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 682 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 683; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ## 684 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 685 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 686; CHECK: vpcmpled %zmm1, %zmm0, %k0 ## 687 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 688 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 689; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ## 690 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 691 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 692; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ## 693 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 694 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 695; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ## 696 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 697 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 698; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ## 699 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 700 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 701; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ## 702 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 703 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 704 ret <8 x i16> %vec7 705} 706 707define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 708; CHECK_LABEL: test_mask_cmp_d_512 709; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## 710 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 711 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 712; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ## 713 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 714 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 715; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ## 716 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 717 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 718; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ## 719 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 720 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 721; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ## 722 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 723 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 724; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ## 725 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 726 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 727; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ## 728 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 729 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 730; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ## 731 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 732 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 733 ret <8 x i16> %vec7 734} 735 736declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 737 738define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 739; CHECK_LABEL: test_ucmp_d_512 740; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ## 741 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) 742 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 743; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ## 744 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) 745 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 746; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ## 747 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) 748 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 749; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ## 750 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) 751 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 752; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ## 753 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) 754 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 755; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ## 756 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) 757 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 758; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ## 759 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) 760 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 761; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ## 762 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) 763 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 764 ret <8 x i16> %vec7 765} 766 767define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 768; CHECK_LABEL: test_mask_ucmp_d_512 769; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ## 770 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) 771 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 772; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ## 773 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) 774 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 775; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ## 776 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) 777 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 778; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ## 779 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) 780 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 781; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ## 782 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) 783 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 784; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ## 785 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) 786 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 787; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ## 788 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) 789 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 790; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ## 791 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) 792 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 793 ret <8 x i16> %vec7 794} 795 796declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone 797 798define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 799; CHECK_LABEL: test_cmp_q_512 800; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ## 801 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 802 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 803; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ## 804 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 805 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 806; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ## 807 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 808 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 809; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ## 810 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 811 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 812; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ## 813 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 814 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 815; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ## 816 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 817 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 818; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ## 819 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 820 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 821; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ## 822 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 823 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 824 ret <8 x i8> %vec7 825} 826 827define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 828; CHECK_LABEL: test_mask_cmp_q_512 829; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## 830 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 831 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 832; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ## 833 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 834 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 835; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ## 836 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 837 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 838; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ## 839 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 840 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 841; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ## 842 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 843 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 844; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ## 845 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 846 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 847; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ## 848 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 849 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 850; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ## 851 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 852 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 853 ret <8 x i8> %vec7 854} 855 856declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 857 858define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 859; CHECK_LABEL: test_ucmp_q_512 860; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ## 861 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) 862 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 863; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ## 864 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) 865 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 866; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ## 867 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) 868 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 869; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ## 870 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) 871 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 872; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ## 873 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) 874 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 875; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ## 876 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) 877 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 878; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ## 879 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) 880 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 881; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ## 882 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) 883 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 884 ret <8 x i8> %vec7 885} 886 887define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 888; CHECK_LABEL: test_mask_ucmp_q_512 889; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ## 890 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) 891 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 892; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## 893 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) 894 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 895; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ## 896 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) 897 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 898; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ## 899 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) 900 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 901; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ## 902 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) 903 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 904; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ## 905 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) 906 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 907; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ## 908 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) 909 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 910; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ## 911 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) 912 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 913 ret <8 x i8> %vec7 914} 915 916declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone 917 918define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { 919; CHECK-LABEL: test_mask_vextractf32x4: 920; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1} 921 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask) 922 ret <4 x float> %res 923} 924 925declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8) 926 927define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { 928; CHECK-LABEL: test_mask_vextracti64x4: 929; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1} 930 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask) 931 ret <4 x i64> %res 932} 933 934declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8) 935 936define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { 937; CHECK-LABEL: test_maskz_vextracti32x4: 938; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} 939 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask) 940 ret <4 x i32> %res 941} 942 943declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8) 944 945define <4 x double> @test_vextractf64x4(<8 x double> %a) { 946; CHECK-LABEL: test_vextractf64x4: 947; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ## 948 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1) 949 ret <4 x double> %res 950} 951 952declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8) 953 954define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { 955 ; CHECK-LABEL: test_x86_avx512_pslli_d 956 ; CHECK: vpslld 957 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 958 ret <16 x i32> %res 959} 960 961define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 962 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d 963 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1} 964 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 965 ret <16 x i32> %res 966} 967 968define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { 969 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d 970 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z} 971 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 972 ret <16 x i32> %res 973} 974 975declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 976 977define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { 978 ; CHECK-LABEL: test_x86_avx512_pslli_q 979 ; CHECK: vpsllq 980 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 981 ret <8 x i64> %res 982} 983 984define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 985 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q 986 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1} 987 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 988 ret <8 x i64> %res 989} 990 991define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { 992 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q 993 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z} 994 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 995 ret <8 x i64> %res 996} 997 998declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 999 1000define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { 1001 ; CHECK-LABEL: test_x86_avx512_psrli_d 1002 ; CHECK: vpsrld 1003 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1004 ret <16 x i32> %res 1005} 1006 1007define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1008 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d 1009 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1} 1010 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1011 ret <16 x i32> %res 1012} 1013 1014define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { 1015 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d 1016 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z} 1017 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1018 ret <16 x i32> %res 1019} 1020 1021declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1022 1023define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { 1024 ; CHECK-LABEL: test_x86_avx512_psrli_q 1025 ; CHECK: vpsrlq 1026 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1027 ret <8 x i64> %res 1028} 1029 1030define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1031 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q 1032 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1} 1033 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1034 ret <8 x i64> %res 1035} 1036 1037define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { 1038 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q 1039 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} 1040 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1041 ret <8 x i64> %res 1042} 1043 1044declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1045 1046define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { 1047 ; CHECK-LABEL: test_x86_avx512_psrai_d 1048 ; CHECK: vpsrad 1049 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1050 ret <16 x i32> %res 1051} 1052 1053define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1054 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d 1055 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1} 1056 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1057 ret <16 x i32> %res 1058} 1059 1060define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { 1061 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d 1062 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z} 1063 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1064 ret <16 x i32> %res 1065} 1066 1067declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1068 1069define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { 1070 ; CHECK-LABEL: test_x86_avx512_psrai_q 1071 ; CHECK: vpsraq 1072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1073 ret <8 x i64> %res 1074} 1075 1076define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1077 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q 1078 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1} 1079 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1080 ret <8 x i64> %res 1081} 1082 1083define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { 1084 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q 1085 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z} 1086 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1087 ret <8 x i64> %res 1088} 1089 1090declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1091 1092define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) { 1093 ; CHECK-LABEL: test_x86_avx512_psll_d 1094 ; CHECK: vpslld 1095 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1096 ret <16 x i32> %res 1097} 1098 1099define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1100 ; CHECK-LABEL: test_x86_avx512_mask_psll_d 1101 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1} 1102 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1103 ret <16 x i32> %res 1104} 1105 1106define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1107 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d 1108 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} 1109 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1110 ret <16 x i32> %res 1111} 1112 1113declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1114 1115define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) { 1116 ; CHECK-LABEL: test_x86_avx512_psll_q 1117 ; CHECK: vpsllq 1118 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1119 ret <8 x i64> %res 1120} 1121 1122define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1123 ; CHECK-LABEL: test_x86_avx512_mask_psll_q 1124 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1} 1125 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1126 ret <8 x i64> %res 1127} 1128 1129define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1130 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q 1131 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} 1132 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1133 ret <8 x i64> %res 1134} 1135 1136declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1137 1138define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) { 1139 ; CHECK-LABEL: test_x86_avx512_psrl_d 1140 ; CHECK: vpsrld 1141 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1142 ret <16 x i32> %res 1143} 1144 1145define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1146 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d 1147 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1} 1148 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1149 ret <16 x i32> %res 1150} 1151 1152define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1153 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d 1154 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} 1155 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1156 ret <16 x i32> %res 1157} 1158 1159declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1160 1161define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) { 1162 ; CHECK-LABEL: test_x86_avx512_psrl_q 1163 ; CHECK: vpsrlq 1164 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1165 ret <8 x i64> %res 1166} 1167 1168define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1169 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q 1170 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} 1171 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1172 ret <8 x i64> %res 1173} 1174 1175define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1176 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q 1177 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} 1178 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1179 ret <8 x i64> %res 1180} 1181 1182declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1183 1184define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) { 1185 ; CHECK-LABEL: test_x86_avx512_psra_d 1186 ; CHECK: vpsrad 1187 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1188 ret <16 x i32> %res 1189} 1190 1191define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1192 ; CHECK-LABEL: test_x86_avx512_mask_psra_d 1193 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1} 1194 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1195 ret <16 x i32> %res 1196} 1197 1198define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1199 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d 1200 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} 1201 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1202 ret <16 x i32> %res 1203} 1204 1205declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1206 1207define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) { 1208 ; CHECK-LABEL: test_x86_avx512_psra_q 1209 ; CHECK: vpsraq 1210 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1211 ret <8 x i64> %res 1212} 1213 1214define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1215 ; CHECK-LABEL: test_x86_avx512_mask_psra_q 1216 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1} 1217 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1218 ret <8 x i64> %res 1219} 1220 1221define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1222 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q 1223 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} 1224 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1225 ret <8 x i64> %res 1226} 1227 1228declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1229 1230define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) { 1231 ; CHECK-LABEL: test_x86_avx512_psllv_d 1232 ; CHECK: vpsllvd 1233 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1234 ret <16 x i32> %res 1235} 1236 1237define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1238 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d 1239 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} 1240 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1241 ret <16 x i32> %res 1242} 1243 1244define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1245 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d 1246 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} 1247 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1248 ret <16 x i32> %res 1249} 1250 1251declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1252 1253define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) { 1254 ; CHECK-LABEL: test_x86_avx512_psllv_q 1255 ; CHECK: vpsllvq 1256 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1257 ret <8 x i64> %res 1258} 1259 1260define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1261 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q 1262 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} 1263 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1264 ret <8 x i64> %res 1265} 1266 1267define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1268 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q 1269 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} 1270 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1271 ret <8 x i64> %res 1272} 1273 1274declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1275 1276 1277define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) { 1278 ; CHECK-LABEL: test_x86_avx512_psrav_d 1279 ; CHECK: vpsravd 1280 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1281 ret <16 x i32> %res 1282} 1283 1284define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1285 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d 1286 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1} 1287 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1288 ret <16 x i32> %res 1289} 1290 1291define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1292 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d 1293 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} 1294 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1295 ret <16 x i32> %res 1296} 1297 1298declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1299 1300define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) { 1301 ; CHECK-LABEL: test_x86_avx512_psrav_q 1302 ; CHECK: vpsravq 1303 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1304 ret <8 x i64> %res 1305} 1306 1307define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1308 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q 1309 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1} 1310 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1311 ret <8 x i64> %res 1312} 1313 1314define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1315 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q 1316 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} 1317 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1318 ret <8 x i64> %res 1319} 1320 1321declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1322 1323define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) { 1324 ; CHECK-LABEL: test_x86_avx512_psrlv_d 1325 ; CHECK: vpsrlvd 1326 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1327 ret <16 x i32> %res 1328} 1329 1330define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1331 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d 1332 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} 1333 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1334 ret <16 x i32> %res 1335} 1336 1337define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1338 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d 1339 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} 1340 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1341 ret <16 x i32> %res 1342} 1343 1344declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1345 1346define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) { 1347 ; CHECK-LABEL: test_x86_avx512_psrlv_q 1348 ; CHECK: vpsrlvq 1349 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1350 ret <8 x i64> %res 1351} 1352 1353define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1354 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q 1355 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} 1356 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1357 ret <8 x i64> %res 1358} 1359 1360define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1361 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q 1362 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} 1363 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1364 ret <8 x i64> %res 1365} 1366 1367declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1368 1369define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) { 1370 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop 1371 ; CHECK: vpsrlvq (% 1372 %b = load <8 x i64>* %ptr 1373 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1374 ret <8 x i64> %res 1375} 1376