1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 -| FileCheck %s --check-prefixes=CHECK,X64
4
5define <8 x i64> @avx512_funnel_shift_q_512(<8 x i64> %a0, <8 x i64> %a1) {
6; X86-LABEL: avx512_funnel_shift_q_512:
7; X86:       # %bb.0:
8; X86-NEXT:    vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
9; X86-NEXT:    retl
10;
11; X64-LABEL: avx512_funnel_shift_q_512:
12; X64:       # %bb.0:
13; X64-NEXT:    vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
14; X64-NEXT:    retq
15  %1 =  shl <8 x i64> %a0, <i64 31, i64 33, i64 31, i64 33, i64 31, i64 33, i64 31, i64 33>
16  %2 = lshr <8 x i64> %a1, <i64 33, i64 31, i64 33, i64 31, i64 33, i64 31, i64 33, i64 31>
17  %3 = or <8 x i64> %1, %2
18  ret <8 x i64> %3
19}
20
21define <8 x i64> @avx512_funnel_shift_q_512_splat(<8 x i64> %a0, <8 x i64> %a1) {
22; CHECK-LABEL: avx512_funnel_shift_q_512_splat:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    vpshldq $31, %zmm1, %zmm0, %zmm0
25; CHECK-NEXT:    ret{{[l|q]}}
26  %1 =  shl <8 x i64> %a0, <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>
27  %2 = lshr <8 x i64> %a1, <i64 33, i64 33, i64 33, i64 33, i64 33, i64 33, i64 33, i64 33>
28  %3 = or <8 x i64> %1, %2
29  ret <8 x i64> %3
30}
31
32define <16 x i32> @avx512_funnel_shift_d_512(<16 x i32> %a0, <16 x i32> %a1) {
33; X86-LABEL: avx512_funnel_shift_d_512:
34; X86:       # %bb.0:
35; X86-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
36; X86-NEXT:    retl
37;
38; X64-LABEL: avx512_funnel_shift_d_512:
39; X64:       # %bb.0:
40; X64-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
41; X64-NEXT:    retq
42  %1 =  shl <16 x i32> %a0, <i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17>
43  %2 = lshr <16 x i32> %a1, <i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15>
44  %3 = or <16 x i32> %1, %2
45  ret <16 x i32> %3
46}
47
48define <16 x i32> @avx512_funnel_shift_d_512_splat(<16 x i32> %a0, <16 x i32> %a1) {
49; CHECK-LABEL: avx512_funnel_shift_d_512_splat:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    vpshldd $15, %zmm1, %zmm0, %zmm0
52; CHECK-NEXT:    ret{{[l|q]}}
53  %1 =  shl <16 x i32> %a0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
54  %2 = lshr <16 x i32> %a1, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
55  %3 = or <16 x i32> %1, %2
56  ret <16 x i32> %3
57}
58
59define <32 x i16> @avx512_funnel_shift_w_512(<32 x i16> %a0, <32 x i16> %a1) {
60; X86-LABEL: avx512_funnel_shift_w_512:
61; X86:       # %bb.0:
62; X86-NEXT:    vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
63; X86-NEXT:    retl
64;
65; X64-LABEL: avx512_funnel_shift_w_512:
66; X64:       # %bb.0:
67; X64-NEXT:    vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
68; X64-NEXT:    retq
69  %1 =  shl <32 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
70  %2 = lshr <32 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
71  %3 = or <32 x i16> %1, %2
72  ret <32 x i16> %3
73}
74
75define <32 x i16> @avx512_funnel_shift_w_512_splat(<32 x i16> %a0, <32 x i16> %a1) {
76; CHECK-LABEL: avx512_funnel_shift_w_512_splat:
77; CHECK:       # %bb.0:
78; CHECK-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0
79; CHECK-NEXT:    ret{{[l|q]}}
80  %1 =  shl <32 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
81  %2 = lshr <32 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
82  %3 = or <32 x i16> %1, %2
83  ret <32 x i16> %3
84}
85
86
87