1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c 6 7define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) { 8; X86-LABEL: test_mm512_mask_compress_epi16: 9; X86: # %bb.0: # %entry 10; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 11; X86-NEXT: vpcompressw %zmm1, %zmm0 {%k1} 12; X86-NEXT: retl 13; 14; X64-LABEL: test_mm512_mask_compress_epi16: 15; X64: # %bb.0: # %entry 16; X64-NEXT: kmovd %edi, %k1 17; X64-NEXT: vpcompressw %zmm1, %zmm0 {%k1} 18; X64-NEXT: retq 19entry: 20 %0 = bitcast <8 x i64> %__D to <32 x i16> 21 %1 = bitcast <8 x i64> %__S to <32 x i16> 22 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U) 23 %3 = bitcast <32 x i16> %2 to <8 x i64> 24 ret <8 x i64> %3 25} 26 27define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) { 28; X86-LABEL: test_mm512_maskz_compress_epi16: 29; X86: # %bb.0: # %entry 30; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 31; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} 32; X86-NEXT: retl 33; 34; X64-LABEL: test_mm512_maskz_compress_epi16: 35; X64: # %bb.0: # %entry 36; X64-NEXT: kmovd %edi, %k1 37; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} 38; X64-NEXT: retq 39entry: 40 %0 = bitcast <8 x i64> %__D to <32 x i16> 41 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U) 42 %2 = bitcast <32 x i16> %1 to <8 x i64> 43 ret <8 x i64> %2 44} 45 46define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) { 47; X86-LABEL: test_mm512_mask_compress_epi8: 48; X86: # %bb.0: # %entry 49; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 50; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 51; X86-NEXT: kunpckdq %k1, %k0, %k1 52; X86-NEXT: vpcompressb %zmm1, %zmm0 {%k1} 53; X86-NEXT: retl 54; 55; X64-LABEL: test_mm512_mask_compress_epi8: 56; X64: # %bb.0: # %entry 57; X64-NEXT: kmovq %rdi, %k1 58; X64-NEXT: vpcompressb %zmm1, %zmm0 {%k1} 59; X64-NEXT: retq 60entry: 61 %0 = bitcast <8 x i64> %__D to <64 x i8> 62 %1 = bitcast <8 x i64> %__S to <64 x i8> 63 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U) 64 %3 = bitcast <64 x i8> %2 to <8 x i64> 65 ret <8 x i64> %3 66} 67 68define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) { 69; X86-LABEL: test_mm512_maskz_compress_epi8: 70; X86: # %bb.0: # %entry 71; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 72; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 73; X86-NEXT: kunpckdq %k1, %k0, %k1 74; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} 75; X86-NEXT: retl 76; 77; X64-LABEL: test_mm512_maskz_compress_epi8: 78; X64: # %bb.0: # %entry 79; X64-NEXT: kmovq %rdi, %k1 80; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} 81; X64-NEXT: retq 82entry: 83 %0 = bitcast <8 x i64> %__D to <64 x i8> 84 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U) 85 %2 = bitcast <64 x i8> %1 to <8 x i64> 86 ret <8 x i64> %2 87} 88 89define void @test_mm512_mask_compressstoreu_epi16(i8* %__P, i32 %__U, <8 x i64> %__D) { 90; X86-LABEL: test_mm512_mask_compressstoreu_epi16: 91; X86: # %bb.0: # %entry 92; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 93; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 94; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} 95; X86-NEXT: vzeroupper 96; X86-NEXT: retl 97; 98; X64-LABEL: test_mm512_mask_compressstoreu_epi16: 99; X64: # %bb.0: # %entry 100; X64-NEXT: kmovd %esi, %k1 101; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} 102; X64-NEXT: vzeroupper 103; X64-NEXT: retq 104entry: 105 %0 = bitcast <8 x i64> %__D to <32 x i16> 106 %1 = bitcast i8* %__P to i16* 107 %2 = bitcast i32 %__U to <32 x i1> 108 tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, i16* %1, <32 x i1> %2) 109 ret void 110} 111 112define void @test_mm512_mask_compressstoreu_epi8(i8* %__P, i64 %__U, <8 x i64> %__D) { 113; X86-LABEL: test_mm512_mask_compressstoreu_epi8: 114; X86: # %bb.0: # %entry 115; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 116; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 117; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 118; X86-NEXT: kunpckdq %k1, %k0, %k1 119; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} 120; X86-NEXT: vzeroupper 121; X86-NEXT: retl 122; 123; X64-LABEL: test_mm512_mask_compressstoreu_epi8: 124; X64: # %bb.0: # %entry 125; X64-NEXT: kmovq %rsi, %k1 126; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} 127; X64-NEXT: vzeroupper 128; X64-NEXT: retq 129entry: 130 %0 = bitcast <8 x i64> %__D to <64 x i8> 131 %1 = bitcast i64 %__U to <64 x i1> 132 tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, i8* %__P, <64 x i1> %1) 133 ret void 134} 135 136define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) { 137; X86-LABEL: test_mm512_mask_expand_epi16: 138; X86: # %bb.0: # %entry 139; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 140; X86-NEXT: vpexpandw %zmm1, %zmm0 {%k1} 141; X86-NEXT: retl 142; 143; X64-LABEL: test_mm512_mask_expand_epi16: 144; X64: # %bb.0: # %entry 145; X64-NEXT: kmovd %edi, %k1 146; X64-NEXT: vpexpandw %zmm1, %zmm0 {%k1} 147; X64-NEXT: retq 148entry: 149 %0 = bitcast <8 x i64> %__D to <32 x i16> 150 %1 = bitcast <8 x i64> %__S to <32 x i16> 151 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U) 152 %3 = bitcast <32 x i16> %2 to <8 x i64> 153 ret <8 x i64> %3 154} 155 156define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) { 157; X86-LABEL: test_mm512_maskz_expand_epi16: 158; X86: # %bb.0: # %entry 159; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 160; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} 161; X86-NEXT: retl 162; 163; X64-LABEL: test_mm512_maskz_expand_epi16: 164; X64: # %bb.0: # %entry 165; X64-NEXT: kmovd %edi, %k1 166; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} 167; X64-NEXT: retq 168entry: 169 %0 = bitcast <8 x i64> %__D to <32 x i16> 170 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U) 171 %2 = bitcast <32 x i16> %1 to <8 x i64> 172 ret <8 x i64> %2 173} 174 175define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) { 176; X86-LABEL: test_mm512_mask_expand_epi8: 177; X86: # %bb.0: # %entry 178; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 179; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 180; X86-NEXT: kunpckdq %k1, %k0, %k1 181; X86-NEXT: vpexpandb %zmm1, %zmm0 {%k1} 182; X86-NEXT: retl 183; 184; X64-LABEL: test_mm512_mask_expand_epi8: 185; X64: # %bb.0: # %entry 186; X64-NEXT: kmovq %rdi, %k1 187; X64-NEXT: vpexpandb %zmm1, %zmm0 {%k1} 188; X64-NEXT: retq 189entry: 190 %0 = bitcast <8 x i64> %__D to <64 x i8> 191 %1 = bitcast <8 x i64> %__S to <64 x i8> 192 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U) 193 %3 = bitcast <64 x i8> %2 to <8 x i64> 194 ret <8 x i64> %3 195} 196 197define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) { 198; X86-LABEL: test_mm512_maskz_expand_epi8: 199; X86: # %bb.0: # %entry 200; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 201; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 202; X86-NEXT: kunpckdq %k1, %k0, %k1 203; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} 204; X86-NEXT: retl 205; 206; X64-LABEL: test_mm512_maskz_expand_epi8: 207; X64: # %bb.0: # %entry 208; X64-NEXT: kmovq %rdi, %k1 209; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} 210; X64-NEXT: retq 211entry: 212 %0 = bitcast <8 x i64> %__D to <64 x i8> 213 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U) 214 %2 = bitcast <64 x i8> %1 to <8 x i64> 215 ret <8 x i64> %2 216} 217 218define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, i8* readonly %__P) { 219; X86-LABEL: test_mm512_mask_expandloadu_epi16: 220; X86: # %bb.0: # %entry 221; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 222; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 223; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} 224; X86-NEXT: retl 225; 226; X64-LABEL: test_mm512_mask_expandloadu_epi16: 227; X64: # %bb.0: # %entry 228; X64-NEXT: kmovd %edi, %k1 229; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} 230; X64-NEXT: retq 231entry: 232 %0 = bitcast <8 x i64> %__S to <32 x i16> 233 %1 = bitcast i8* %__P to i16* 234 %2 = bitcast i32 %__U to <32 x i1> 235 %3 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %0) 236 %4 = bitcast <32 x i16> %3 to <8 x i64> 237 ret <8 x i64> %4 238} 239 240define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, i8* readonly %__P) { 241; X86-LABEL: test_mm512_maskz_expandloadu_epi16: 242; X86: # %bb.0: # %entry 243; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 244; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 245; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} 246; X86-NEXT: retl 247; 248; X64-LABEL: test_mm512_maskz_expandloadu_epi16: 249; X64: # %bb.0: # %entry 250; X64-NEXT: kmovd %edi, %k1 251; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} {z} 252; X64-NEXT: retq 253entry: 254 %0 = bitcast i8* %__P to i16* 255 %1 = bitcast i32 %__U to <32 x i1> 256 %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %0, <32 x i1> %1, <32 x i16> zeroinitializer) 257 %3 = bitcast <32 x i16> %2 to <8 x i64> 258 ret <8 x i64> %3 259} 260 261define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, i8* readonly %__P) { 262; X86-LABEL: test_mm512_mask_expandloadu_epi8: 263; X86: # %bb.0: # %entry 264; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 265; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 266; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 267; X86-NEXT: kunpckdq %k1, %k0, %k1 268; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} 269; X86-NEXT: retl 270; 271; X64-LABEL: test_mm512_mask_expandloadu_epi8: 272; X64: # %bb.0: # %entry 273; X64-NEXT: kmovq %rdi, %k1 274; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} 275; X64-NEXT: retq 276entry: 277 %0 = bitcast <8 x i64> %__S to <64 x i8> 278 %1 = bitcast i64 %__U to <64 x i1> 279 %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %1, <64 x i8> %0) 280 %3 = bitcast <64 x i8> %2 to <8 x i64> 281 ret <8 x i64> %3 282} 283 284define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, i8* readonly %__P) { 285; X86-LABEL: test_mm512_maskz_expandloadu_epi8: 286; X86: # %bb.0: # %entry 287; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 288; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 289; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 290; X86-NEXT: kunpckdq %k1, %k0, %k1 291; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} 292; X86-NEXT: retl 293; 294; X64-LABEL: test_mm512_maskz_expandloadu_epi8: 295; X64: # %bb.0: # %entry 296; X64-NEXT: kmovq %rdi, %k1 297; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} {z} 298; X64-NEXT: retq 299entry: 300 %0 = bitcast i64 %__U to <64 x i1> 301 %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %0, <64 x i8> zeroinitializer) 302 %2 = bitcast <64 x i8> %1 to <8 x i64> 303 ret <8 x i64> %2 304} 305 306define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 307; X86-LABEL: test_mm512_mask_shldi_epi64: 308; X86: # %bb.0: # %entry 309; X86-NEXT: movb {{[0-9]+}}(%esp), %al 310; X86-NEXT: kmovd %eax, %k1 311; X86-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1} 312; X86-NEXT: retl 313; 314; X64-LABEL: test_mm512_mask_shldi_epi64: 315; X64: # %bb.0: # %entry 316; X64-NEXT: kmovd %edi, %k1 317; X64-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1} 318; X64-NEXT: retq 319entry: 320 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>) 321 %1 = bitcast i8 %__U to <8 x i1> 322 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 323 ret <8 x i64> %2 324} 325 326declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) 327 328define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 329; X86-LABEL: test_mm512_maskz_shldi_epi64: 330; X86: # %bb.0: # %entry 331; X86-NEXT: movb {{[0-9]+}}(%esp), %al 332; X86-NEXT: kmovd %eax, %k1 333; X86-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 334; X86-NEXT: retl 335; 336; X64-LABEL: test_mm512_maskz_shldi_epi64: 337; X64: # %bb.0: # %entry 338; X64-NEXT: kmovd %edi, %k1 339; X64-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 340; X64-NEXT: retq 341entry: 342 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>) 343 %1 = bitcast i8 %__U to <8 x i1> 344 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 345 ret <8 x i64> %2 346} 347 348define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) { 349; CHECK-LABEL: test_mm512_shldi_epi64: 350; CHECK: # %bb.0: # %entry 351; CHECK-NEXT: vpshldq $31, %zmm1, %zmm0, %zmm0 352; CHECK-NEXT: ret{{[l|q]}} 353entry: 354 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>) 355 ret <8 x i64> %0 356} 357 358define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 359; X86-LABEL: test_mm512_mask_shldi_epi32: 360; X86: # %bb.0: # %entry 361; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 362; X86-NEXT: vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1} 363; X86-NEXT: retl 364; 365; X64-LABEL: test_mm512_mask_shldi_epi32: 366; X64: # %bb.0: # %entry 367; X64-NEXT: kmovd %edi, %k1 368; X64-NEXT: vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1} 369; X64-NEXT: retq 370entry: 371 %0 = bitcast <8 x i64> %__A to <16 x i32> 372 %1 = bitcast <8 x i64> %__B to <16 x i32> 373 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>) 374 %3 = bitcast <8 x i64> %__S to <16 x i32> 375 %4 = bitcast i16 %__U to <16 x i1> 376 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 377 %6 = bitcast <16 x i32> %5 to <8 x i64> 378 ret <8 x i64> %6 379} 380 381declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 382 383define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 384; X86-LABEL: test_mm512_maskz_shldi_epi32: 385; X86: # %bb.0: # %entry 386; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 387; X86-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 388; X86-NEXT: retl 389; 390; X64-LABEL: test_mm512_maskz_shldi_epi32: 391; X64: # %bb.0: # %entry 392; X64-NEXT: kmovd %edi, %k1 393; X64-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 394; X64-NEXT: retq 395entry: 396 %0 = bitcast <8 x i64> %__A to <16 x i32> 397 %1 = bitcast <8 x i64> %__B to <16 x i32> 398 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>) 399 %3 = bitcast i16 %__U to <16 x i1> 400 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer 401 %5 = bitcast <16 x i32> %4 to <8 x i64> 402 ret <8 x i64> %5 403} 404 405define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) { 406; CHECK-LABEL: test_mm512_shldi_epi32: 407; CHECK: # %bb.0: # %entry 408; CHECK-NEXT: vpshldd $31, %zmm1, %zmm0, %zmm0 409; CHECK-NEXT: ret{{[l|q]}} 410entry: 411 %0 = bitcast <8 x i64> %__A to <16 x i32> 412 %1 = bitcast <8 x i64> %__B to <16 x i32> 413 %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>) 414 %3 = bitcast <16 x i32> %2 to <8 x i64> 415 ret <8 x i64> %3 416} 417 418define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 419; X86-LABEL: test_mm512_mask_shldi_epi16: 420; X86: # %bb.0: # %entry 421; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 422; X86-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1} 423; X86-NEXT: retl 424; 425; X64-LABEL: test_mm512_mask_shldi_epi16: 426; X64: # %bb.0: # %entry 427; X64-NEXT: kmovd %edi, %k1 428; X64-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1} 429; X64-NEXT: retq 430entry: 431 %0 = bitcast <8 x i64> %__A to <32 x i16> 432 %1 = bitcast <8 x i64> %__B to <32 x i16> 433 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 434 %3 = bitcast <8 x i64> %__S to <32 x i16> 435 %4 = bitcast i32 %__U to <32 x i1> 436 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3 437 %6 = bitcast <32 x i16> %5 to <8 x i64> 438 ret <8 x i64> %6 439} 440 441declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) 442 443define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 444; X86-LABEL: test_mm512_maskz_shldi_epi16: 445; X86: # %bb.0: # %entry 446; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 447; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z} 448; X86-NEXT: retl 449; 450; X64-LABEL: test_mm512_maskz_shldi_epi16: 451; X64: # %bb.0: # %entry 452; X64-NEXT: kmovd %edi, %k1 453; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z} 454; X64-NEXT: retq 455entry: 456 %0 = bitcast <8 x i64> %__A to <32 x i16> 457 %1 = bitcast <8 x i64> %__B to <32 x i16> 458 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 459 %3 = bitcast i32 %__U to <32 x i1> 460 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer 461 %5 = bitcast <32 x i16> %4 to <8 x i64> 462 ret <8 x i64> %5 463} 464 465define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) { 466; CHECK-LABEL: test_mm512_shldi_epi16: 467; CHECK: # %bb.0: # %entry 468; CHECK-NEXT: vpshldw $15, %zmm1, %zmm0, %zmm0 469; CHECK-NEXT: ret{{[l|q]}} 470entry: 471 %0 = bitcast <8 x i64> %__A to <32 x i16> 472 %1 = bitcast <8 x i64> %__B to <32 x i16> 473 %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 474 %3 = bitcast <32 x i16> %2 to <8 x i64> 475 ret <8 x i64> %3 476} 477 478define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 479; X86-LABEL: test_mm512_mask_shrdi_epi64: 480; X86: # %bb.0: # %entry 481; X86-NEXT: movb {{[0-9]+}}(%esp), %al 482; X86-NEXT: kmovd %eax, %k1 483; X86-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1} 484; X86-NEXT: retl 485; 486; X64-LABEL: test_mm512_mask_shrdi_epi64: 487; X64: # %bb.0: # %entry 488; X64-NEXT: kmovd %edi, %k1 489; X64-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1} 490; X64-NEXT: retq 491entry: 492 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>) 493 %1 = bitcast i8 %__U to <8 x i1> 494 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 495 ret <8 x i64> %2 496} 497 498declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) 499 500define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 501; X86-LABEL: test_mm512_maskz_shrdi_epi64: 502; X86: # %bb.0: # %entry 503; X86-NEXT: movb {{[0-9]+}}(%esp), %al 504; X86-NEXT: kmovd %eax, %k1 505; X86-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 506; X86-NEXT: retl 507; 508; X64-LABEL: test_mm512_maskz_shrdi_epi64: 509; X64: # %bb.0: # %entry 510; X64-NEXT: kmovd %edi, %k1 511; X64-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 512; X64-NEXT: retq 513entry: 514 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>) 515 %1 = bitcast i8 %__U to <8 x i1> 516 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 517 ret <8 x i64> %2 518} 519 520define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) { 521; CHECK-LABEL: test_mm512_shrdi_epi64: 522; CHECK: # %bb.0: # %entry 523; CHECK-NEXT: vpshrdq $31, %zmm1, %zmm0, %zmm0 524; CHECK-NEXT: ret{{[l|q]}} 525entry: 526 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>) 527 ret <8 x i64> %0 528} 529 530define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 531; X86-LABEL: test_mm512_mask_shrdi_epi32: 532; X86: # %bb.0: # %entry 533; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 534; X86-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1} 535; X86-NEXT: retl 536; 537; X64-LABEL: test_mm512_mask_shrdi_epi32: 538; X64: # %bb.0: # %entry 539; X64-NEXT: kmovd %edi, %k1 540; X64-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1} 541; X64-NEXT: retq 542entry: 543 %0 = bitcast <8 x i64> %__A to <16 x i32> 544 %1 = bitcast <8 x i64> %__B to <16 x i32> 545 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>) 546 %3 = bitcast <8 x i64> %__S to <16 x i32> 547 %4 = bitcast i16 %__U to <16 x i1> 548 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 549 %6 = bitcast <16 x i32> %5 to <8 x i64> 550 ret <8 x i64> %6 551} 552 553declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 554 555define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 556; X86-LABEL: test_mm512_maskz_shrdi_epi32: 557; X86: # %bb.0: # %entry 558; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 559; X86-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 560; X86-NEXT: retl 561; 562; X64-LABEL: test_mm512_maskz_shrdi_epi32: 563; X64: # %bb.0: # %entry 564; X64-NEXT: kmovd %edi, %k1 565; X64-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 566; X64-NEXT: retq 567entry: 568 %0 = bitcast <8 x i64> %__A to <16 x i32> 569 %1 = bitcast <8 x i64> %__B to <16 x i32> 570 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>) 571 %3 = bitcast i16 %__U to <16 x i1> 572 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer 573 %5 = bitcast <16 x i32> %4 to <8 x i64> 574 ret <8 x i64> %5 575} 576 577define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) { 578; CHECK-LABEL: test_mm512_shrdi_epi32: 579; CHECK: # %bb.0: # %entry 580; CHECK-NEXT: vpshrdd $31, %zmm1, %zmm0, %zmm0 581; CHECK-NEXT: ret{{[l|q]}} 582entry: 583 %0 = bitcast <8 x i64> %__A to <16 x i32> 584 %1 = bitcast <8 x i64> %__B to <16 x i32> 585 %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>) 586 %3 = bitcast <16 x i32> %2 to <8 x i64> 587 ret <8 x i64> %3 588} 589 590define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 591; X86-LABEL: test_mm512_mask_shrdi_epi16: 592; X86: # %bb.0: # %entry 593; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 594; X86-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1} 595; X86-NEXT: retl 596; 597; X64-LABEL: test_mm512_mask_shrdi_epi16: 598; X64: # %bb.0: # %entry 599; X64-NEXT: kmovd %edi, %k1 600; X64-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1} 601; X64-NEXT: retq 602entry: 603 %0 = bitcast <8 x i64> %__A to <32 x i16> 604 %1 = bitcast <8 x i64> %__B to <32 x i16> 605 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 606 %3 = bitcast <8 x i64> %__S to <32 x i16> 607 %4 = bitcast i32 %__U to <32 x i1> 608 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3 609 %6 = bitcast <32 x i16> %5 to <8 x i64> 610 ret <8 x i64> %6 611} 612 613declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>) 614 615define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 616; X86-LABEL: test_mm512_maskz_shrdi_epi16: 617; X86: # %bb.0: # %entry 618; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 619; X86-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 620; X86-NEXT: retl 621; 622; X64-LABEL: test_mm512_maskz_shrdi_epi16: 623; X64: # %bb.0: # %entry 624; X64-NEXT: kmovd %edi, %k1 625; X64-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z} 626; X64-NEXT: retq 627entry: 628 %0 = bitcast <8 x i64> %__A to <32 x i16> 629 %1 = bitcast <8 x i64> %__B to <32 x i16> 630 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 631 %3 = bitcast i32 %__U to <32 x i1> 632 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer 633 %5 = bitcast <32 x i16> %4 to <8 x i64> 634 ret <8 x i64> %5 635} 636 637define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) { 638; CHECK-LABEL: test_mm512_shrdi_epi16: 639; CHECK: # %bb.0: # %entry 640; CHECK-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 641; CHECK-NEXT: ret{{[l|q]}} 642entry: 643 %0 = bitcast <8 x i64> %__A to <32 x i16> 644 %1 = bitcast <8 x i64> %__B to <32 x i16> 645 %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>) 646 %3 = bitcast <32 x i16> %2 to <8 x i64> 647 ret <8 x i64> %3 648} 649 650define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 651; X86-LABEL: test_mm512_mask_shldv_epi64: 652; X86: # %bb.0: # %entry 653; X86-NEXT: movb {{[0-9]+}}(%esp), %al 654; X86-NEXT: kmovd %eax, %k1 655; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} 656; X86-NEXT: retl 657; 658; X64-LABEL: test_mm512_mask_shldv_epi64: 659; X64: # %bb.0: # %entry 660; X64-NEXT: kmovd %edi, %k1 661; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} 662; X64-NEXT: retq 663entry: 664 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) 665 %1 = bitcast i8 %__U to <8 x i1> 666 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 667 ret <8 x i64> %2 668} 669 670define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 671; X86-LABEL: test_mm512_maskz_shldv_epi64: 672; X86: # %bb.0: # %entry 673; X86-NEXT: movb {{[0-9]+}}(%esp), %al 674; X86-NEXT: kmovd %eax, %k1 675; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} 676; X86-NEXT: retl 677; 678; X64-LABEL: test_mm512_maskz_shldv_epi64: 679; X64: # %bb.0: # %entry 680; X64-NEXT: kmovd %edi, %k1 681; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} 682; X64-NEXT: retq 683entry: 684 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) 685 %1 = bitcast i8 %__U to <8 x i1> 686 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 687 ret <8 x i64> %2 688} 689 690define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 691; CHECK-LABEL: test_mm512_shldv_epi64: 692; CHECK: # %bb.0: # %entry 693; CHECK-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 694; CHECK-NEXT: ret{{[l|q]}} 695entry: 696 %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) 697 ret <8 x i64> %0 698} 699 700define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 701; X86-LABEL: test_mm512_mask_shldv_epi32: 702; X86: # %bb.0: # %entry 703; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 704; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} 705; X86-NEXT: retl 706; 707; X64-LABEL: test_mm512_mask_shldv_epi32: 708; X64: # %bb.0: # %entry 709; X64-NEXT: kmovd %edi, %k1 710; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} 711; X64-NEXT: retq 712entry: 713 %0 = bitcast <8 x i64> %__S to <16 x i32> 714 %1 = bitcast <8 x i64> %__A to <16 x i32> 715 %2 = bitcast <8 x i64> %__B to <16 x i32> 716 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) 717 %4 = bitcast i16 %__U to <16 x i1> 718 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0 719 %6 = bitcast <16 x i32> %5 to <8 x i64> 720 ret <8 x i64> %6 721} 722 723define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 724; X86-LABEL: test_mm512_maskz_shldv_epi32: 725; X86: # %bb.0: # %entry 726; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 727; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} 728; X86-NEXT: retl 729; 730; X64-LABEL: test_mm512_maskz_shldv_epi32: 731; X64: # %bb.0: # %entry 732; X64-NEXT: kmovd %edi, %k1 733; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} 734; X64-NEXT: retq 735entry: 736 %0 = bitcast <8 x i64> %__S to <16 x i32> 737 %1 = bitcast <8 x i64> %__A to <16 x i32> 738 %2 = bitcast <8 x i64> %__B to <16 x i32> 739 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) 740 %4 = bitcast i16 %__U to <16 x i1> 741 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer 742 %6 = bitcast <16 x i32> %5 to <8 x i64> 743 ret <8 x i64> %6 744} 745 746define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 747; CHECK-LABEL: test_mm512_shldv_epi32: 748; CHECK: # %bb.0: # %entry 749; CHECK-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 750; CHECK-NEXT: ret{{[l|q]}} 751entry: 752 %0 = bitcast <8 x i64> %__S to <16 x i32> 753 %1 = bitcast <8 x i64> %__A to <16 x i32> 754 %2 = bitcast <8 x i64> %__B to <16 x i32> 755 %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) 756 %4 = bitcast <16 x i32> %3 to <8 x i64> 757 ret <8 x i64> %4 758} 759 760define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 761; X86-LABEL: test_mm512_mask_shldv_epi16: 762; X86: # %bb.0: # %entry 763; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 764; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} 765; X86-NEXT: retl 766; 767; X64-LABEL: test_mm512_mask_shldv_epi16: 768; X64: # %bb.0: # %entry 769; X64-NEXT: kmovd %edi, %k1 770; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} 771; X64-NEXT: retq 772entry: 773 %0 = bitcast <8 x i64> %__S to <32 x i16> 774 %1 = bitcast <8 x i64> %__A to <32 x i16> 775 %2 = bitcast <8 x i64> %__B to <32 x i16> 776 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 777 %4 = bitcast i32 %__U to <32 x i1> 778 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0 779 %6 = bitcast <32 x i16> %5 to <8 x i64> 780 ret <8 x i64> %6 781} 782 783define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 784; X86-LABEL: test_mm512_maskz_shldv_epi16: 785; X86: # %bb.0: # %entry 786; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 787; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} 788; X86-NEXT: retl 789; 790; X64-LABEL: test_mm512_maskz_shldv_epi16: 791; X64: # %bb.0: # %entry 792; X64-NEXT: kmovd %edi, %k1 793; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} 794; X64-NEXT: retq 795entry: 796 %0 = bitcast <8 x i64> %__S to <32 x i16> 797 %1 = bitcast <8 x i64> %__A to <32 x i16> 798 %2 = bitcast <8 x i64> %__B to <32 x i16> 799 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 800 %4 = bitcast i32 %__U to <32 x i1> 801 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer 802 %6 = bitcast <32 x i16> %5 to <8 x i64> 803 ret <8 x i64> %6 804} 805 806define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 807; CHECK-LABEL: test_mm512_shldv_epi16: 808; CHECK: # %bb.0: # %entry 809; CHECK-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 810; CHECK-NEXT: ret{{[l|q]}} 811entry: 812 %0 = bitcast <8 x i64> %__S to <32 x i16> 813 %1 = bitcast <8 x i64> %__A to <32 x i16> 814 %2 = bitcast <8 x i64> %__B to <32 x i16> 815 %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 816 %4 = bitcast <32 x i16> %3 to <8 x i64> 817 ret <8 x i64> %4 818} 819 820define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 821; X86-LABEL: test_mm512_mask_shrdv_epi64: 822; X86: # %bb.0: # %entry 823; X86-NEXT: movb {{[0-9]+}}(%esp), %al 824; X86-NEXT: kmovd %eax, %k1 825; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} 826; X86-NEXT: retl 827; 828; X64-LABEL: test_mm512_mask_shrdv_epi64: 829; X64: # %bb.0: # %entry 830; X64-NEXT: kmovd %edi, %k1 831; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} 832; X64-NEXT: retq 833entry: 834 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B) 835 %1 = bitcast i8 %__U to <8 x i1> 836 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 837 ret <8 x i64> %2 838} 839 840define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 841; X86-LABEL: test_mm512_maskz_shrdv_epi64: 842; X86: # %bb.0: # %entry 843; X86-NEXT: movb {{[0-9]+}}(%esp), %al 844; X86-NEXT: kmovd %eax, %k1 845; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} 846; X86-NEXT: retl 847; 848; X64-LABEL: test_mm512_maskz_shrdv_epi64: 849; X64: # %bb.0: # %entry 850; X64-NEXT: kmovd %edi, %k1 851; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} 852; X64-NEXT: retq 853entry: 854 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B) 855 %1 = bitcast i8 %__U to <8 x i1> 856 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 857 ret <8 x i64> %2 858} 859 860define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 861; CHECK-LABEL: test_mm512_shrdv_epi64: 862; CHECK: # %bb.0: # %entry 863; CHECK-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 864; CHECK-NEXT: ret{{[l|q]}} 865entry: 866 %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B) 867 ret <8 x i64> %0 868} 869 870define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 871; X86-LABEL: test_mm512_mask_shrdv_epi32: 872; X86: # %bb.0: # %entry 873; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 874; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} 875; X86-NEXT: retl 876; 877; X64-LABEL: test_mm512_mask_shrdv_epi32: 878; X64: # %bb.0: # %entry 879; X64-NEXT: kmovd %edi, %k1 880; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} 881; X64-NEXT: retq 882entry: 883 %0 = bitcast <8 x i64> %__S to <16 x i32> 884 %1 = bitcast <8 x i64> %__A to <16 x i32> 885 %2 = bitcast <8 x i64> %__B to <16 x i32> 886 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2) 887 %4 = bitcast i16 %__U to <16 x i1> 888 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0 889 %6 = bitcast <16 x i32> %5 to <8 x i64> 890 ret <8 x i64> %6 891} 892 893define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 894; X86-LABEL: test_mm512_maskz_shrdv_epi32: 895; X86: # %bb.0: # %entry 896; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 897; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} 898; X86-NEXT: retl 899; 900; X64-LABEL: test_mm512_maskz_shrdv_epi32: 901; X64: # %bb.0: # %entry 902; X64-NEXT: kmovd %edi, %k1 903; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} 904; X64-NEXT: retq 905entry: 906 %0 = bitcast <8 x i64> %__S to <16 x i32> 907 %1 = bitcast <8 x i64> %__A to <16 x i32> 908 %2 = bitcast <8 x i64> %__B to <16 x i32> 909 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2) 910 %4 = bitcast i16 %__U to <16 x i1> 911 %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer 912 %6 = bitcast <16 x i32> %5 to <8 x i64> 913 ret <8 x i64> %6 914} 915 916define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 917; CHECK-LABEL: test_mm512_shrdv_epi32: 918; CHECK: # %bb.0: # %entry 919; CHECK-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 920; CHECK-NEXT: ret{{[l|q]}} 921entry: 922 %0 = bitcast <8 x i64> %__S to <16 x i32> 923 %1 = bitcast <8 x i64> %__A to <16 x i32> 924 %2 = bitcast <8 x i64> %__B to <16 x i32> 925 %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2) 926 %4 = bitcast <16 x i32> %3 to <8 x i64> 927 ret <8 x i64> %4 928} 929 930define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 931; X86-LABEL: test_mm512_mask_shrdv_epi16: 932; X86: # %bb.0: # %entry 933; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 934; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} 935; X86-NEXT: retl 936; 937; X64-LABEL: test_mm512_mask_shrdv_epi16: 938; X64: # %bb.0: # %entry 939; X64-NEXT: kmovd %edi, %k1 940; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} 941; X64-NEXT: retq 942entry: 943 %0 = bitcast <8 x i64> %__S to <32 x i16> 944 %1 = bitcast <8 x i64> %__A to <32 x i16> 945 %2 = bitcast <8 x i64> %__B to <32 x i16> 946 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2) 947 %4 = bitcast i32 %__U to <32 x i1> 948 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0 949 %6 = bitcast <32 x i16> %5 to <8 x i64> 950 ret <8 x i64> %6 951} 952 953define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 954; X86-LABEL: test_mm512_maskz_shrdv_epi16: 955; X86: # %bb.0: # %entry 956; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 957; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} 958; X86-NEXT: retl 959; 960; X64-LABEL: test_mm512_maskz_shrdv_epi16: 961; X64: # %bb.0: # %entry 962; X64-NEXT: kmovd %edi, %k1 963; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} 964; X64-NEXT: retq 965entry: 966 %0 = bitcast <8 x i64> %__S to <32 x i16> 967 %1 = bitcast <8 x i64> %__A to <32 x i16> 968 %2 = bitcast <8 x i64> %__B to <32 x i16> 969 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2) 970 %4 = bitcast i32 %__U to <32 x i1> 971 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer 972 %6 = bitcast <32 x i16> %5 to <8 x i64> 973 ret <8 x i64> %6 974} 975 976define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 977; CHECK-LABEL: test_mm512_shrdv_epi16: 978; CHECK: # %bb.0: # %entry 979; CHECK-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 980; CHECK-NEXT: ret{{[l|q]}} 981entry: 982 %0 = bitcast <8 x i64> %__S to <32 x i16> 983 %1 = bitcast <8 x i64> %__A to <32 x i16> 984 %2 = bitcast <8 x i64> %__B to <32 x i16> 985 %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2) 986 %4 = bitcast <32 x i16> %3 to <8 x i64> 987 ret <8 x i64> %4 988} 989 990declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32) 991declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64) 992declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>) 993declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>) 994declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32) 995declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64) 996declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>) 997declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>) 998