1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=ALL,AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512DQ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefixes=ALL,AVX512VBMI
6
7define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a)  {
8; ALL-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
9; ALL:       # %bb.0:
10; ALL-NEXT:    vpsrld $16, %xmm0, %xmm0
11; ALL-NEXT:    retq
12  %b = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
13  ret <64 x i8> %b
14}
15
16define <64 x i8> @shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<64 x i8> %a, <64 x i8> %b) {
17; AVX512F-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
18; AVX512F:       # %bb.0:
19; AVX512F-NEXT:    vpslldq {{.*#+}} ymm1 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
20; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
21; AVX512F-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
22; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
23; AVX512F-NEXT:    retq
24;
25; AVX512BW-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
26; AVX512BW:       # %bb.0:
27; AVX512BW-NEXT:    vpslldq {{.*#+}} zmm0 = zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
28; AVX512BW-NEXT:    retq
29;
30; AVX512DQ-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
31; AVX512DQ:       # %bb.0:
32; AVX512DQ-NEXT:    vpslldq {{.*#+}} ymm1 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
33; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
34; AVX512DQ-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
35; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
36; AVX512DQ-NEXT:    retq
37;
38; AVX512VBMI-LABEL: shuffle_v64i8_zz_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_zz_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_zz_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_zz_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
39; AVX512VBMI:       # %bb.0:
40; AVX512VBMI-NEXT:    vpslldq {{.*#+}} zmm0 = zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
41; AVX512VBMI-NEXT:    retq
42  %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 79, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 95, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 111, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 127, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
43  ret <64 x i8> %shuffle
44}
45
46define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz(<64 x i8> %a, <64 x i8> %b) {
47; AVX512F-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz:
48; AVX512F:       # %bb.0:
49; AVX512F-NEXT:    vpsrldq {{.*#+}} ymm1 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
50; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
51; AVX512F-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
52; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
53; AVX512F-NEXT:    retq
54;
55; AVX512BW-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz:
56; AVX512BW:       # %bb.0:
57; AVX512BW-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zmm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zmm0[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zmm0[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zero,zero
58; AVX512BW-NEXT:    retq
59;
60; AVX512DQ-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz:
61; AVX512DQ:       # %bb.0:
62; AVX512DQ-NEXT:    vpsrldq {{.*#+}} ymm1 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
63; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
64; AVX512DQ-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
65; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
66; AVX512DQ-NEXT:    retq
67;
68; AVX512VBMI-LABEL: shuffle_v64i8_02_03_04_05_06_07_08_09_10_11_12_13_14_15_zz_zz_18_19_20_21_22_23_24_25_26_27_28_29_30_31_zz_zz_34_35_36_37_38_39_40_41_42_43_44_45_46_47_zz_zz_50_51_52_53_54_55_56_57_58_59_60_61_62_63_zz_zz:
69; AVX512VBMI:       # %bb.0:
70; AVX512VBMI-NEXT:    vpsrldq {{.*#+}} zmm0 = zmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zmm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zmm0[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zmm0[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zero,zero
71; AVX512VBMI-NEXT:    retq
72  %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 64, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 64, i32 64, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 64, i32 64, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 64>
73  ret <64 x i8> %shuffle
74}
75
76define <64 x i8> @shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<64 x i8> %a, <64 x i8> %b) {
77; AVX512F-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
78; AVX512F:       # %bb.0:
79; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
80; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
81; AVX512F-NEXT:    vpalignr {{.*#+}} ymm2 = ymm2[15],ymm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm2[31],ymm3[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
82; AVX512F-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
83; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
84; AVX512F-NEXT:    retq
85;
86; AVX512BW-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
87; AVX512BW:       # %bb.0:
88; AVX512BW-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
89; AVX512BW-NEXT:    retq
90;
91; AVX512DQ-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
92; AVX512DQ:       # %bb.0:
93; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
94; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
95; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm2 = ymm2[15],ymm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm2[31],ymm3[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
96; AVX512DQ-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
97; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
98; AVX512DQ-NEXT:    retq
99;
100; AVX512VBMI-LABEL: shuffle_v64i8_79_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_95_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_111_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_127_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
101; AVX512VBMI:       # %bb.0:
102; AVX512VBMI-NEXT:    vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
103; AVX512VBMI-NEXT:    retq
104  %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 79, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 95, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 111, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 127, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
105  ret <64 x i8> %shuffle
106}
107
108
109define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) {
110; AVX512F-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
111; AVX512F:       # %bb.0:
112; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,0,0,0]
113; AVX512F-NEXT:    vpandq %zmm1, %zmm0, %zmm0
114; AVX512F-NEXT:    retq
115;
116; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
117; AVX512BW:       # %bb.0:
118; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,0,0,0]
119; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
120; AVX512BW-NEXT:    retq
121;
122; AVX512DQ-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
123; AVX512DQ:       # %bb.0:
124; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm1 = [255,0,0,0]
125; AVX512DQ-NEXT:    vandps %zmm1, %zmm0, %zmm0
126; AVX512DQ-NEXT:    retq
127;
128; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
129; AVX512VBMI:       # %bb.0:
130; AVX512VBMI-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,0,0,0]
131; AVX512VBMI-NEXT:    vpandq %zmm1, %zmm0, %zmm0
132; AVX512VBMI-NEXT:    retq
133  %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 0, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
134  ret <64 x i8> %shuffle
135}
136
137define <64 x i8> @shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<64 x i8> %a, <64 x i8> %b) {
138; AVX512F-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
139; AVX512F:       # %bb.0:
140; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
141; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
142; AVX512F-NEXT:    retq
143;
144; AVX512BW-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
145; AVX512BW:       # %bb.0:
146; AVX512BW-NEXT:    vpbroadcastb %xmm0, %zmm0
147; AVX512BW-NEXT:    retq
148;
149; AVX512DQ-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
150; AVX512DQ:       # %bb.0:
151; AVX512DQ-NEXT:    vpbroadcastb %xmm0, %ymm0
152; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
153; AVX512DQ-NEXT:    retq
154;
155; AVX512VBMI-LABEL: shuffle_v64i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
156; AVX512VBMI:       # %bb.0:
157; AVX512VBMI-NEXT:    vpbroadcastb %xmm0, %zmm0
158; AVX512VBMI-NEXT:    retq
159  %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
160  ret <64 x i8> %shuffle
161}
162
163define <64 x i8> @shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00(<64 x i8> %a) {
164; AVX512F-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
165; AVX512F:       # %bb.0:
166; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
167; AVX512F-NEXT:    vpshufb %ymm1, %ymm0, %ymm2
168; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
169; AVX512F-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
170; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
171; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
172; AVX512F-NEXT:    retq
173;
174; AVX512BW-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
175; AVX512BW:       # %bb.0:
176; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48]
177; AVX512BW-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1]
178; AVX512BW-NEXT:    retq
179;
180; AVX512DQ-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
181; AVX512DQ:       # %bb.0:
182; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
183; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm0, %ymm2
184; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
185; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
186; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
187; AVX512DQ-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
188; AVX512DQ-NEXT:    retq
189;
190; AVX512VBMI-LABEL: shuffle_v64i8_63_62_61_60_59_58_57_56_55_54_53_52_51_50_49_48_47_46_45_44_43_42_41_40_39_38_37_36_35_34_33_32_31_30_29_28_27_26_25_24_23_22_21_20_19_18_17_16_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00:
191; AVX512VBMI:       # %bb.0:
192; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
193; AVX512VBMI-NEXT:    vpermb %zmm0, %zmm1, %zmm0
194; AVX512VBMI-NEXT:    retq
195  %shuffle = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
196  ret <64 x i8> %shuffle
197}
198
199; PR44379
200define <64 x i8> @shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57(<64 x i8> %a) {
201; ALL-LABEL: shuffle_v64i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25_34_35_36_37_38_39_32_33_42_43_44_45_46_47_40_41_50_51_52_53_54_55_48_49_58_59_60_61_62_63_56_57:
202; ALL:       # %bb.0:
203; ALL-NEXT:    vprolq $48, %zmm0, %zmm0
204; ALL-NEXT:    retq
205  %shuffle = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 32, i32 33, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 40, i32 41, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 48, i32 49, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 56, i32 57>
206  ret <64 x i8> %shuffle
207}
208
209define <64 x i8> @insert_dup_mem_v64i8_i32(i32* %ptr) {
210; AVX512F-LABEL: insert_dup_mem_v64i8_i32:
211; AVX512F:       # %bb.0:
212; AVX512F-NEXT:    vpbroadcastb (%rdi), %ymm0
213; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
214; AVX512F-NEXT:    retq
215;
216; AVX512BW-LABEL: insert_dup_mem_v64i8_i32:
217; AVX512BW:       # %bb.0:
218; AVX512BW-NEXT:    vpbroadcastb (%rdi), %zmm0
219; AVX512BW-NEXT:    retq
220;
221; AVX512DQ-LABEL: insert_dup_mem_v64i8_i32:
222; AVX512DQ:       # %bb.0:
223; AVX512DQ-NEXT:    vpbroadcastb (%rdi), %ymm0
224; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
225; AVX512DQ-NEXT:    retq
226;
227; AVX512VBMI-LABEL: insert_dup_mem_v64i8_i32:
228; AVX512VBMI:       # %bb.0:
229; AVX512VBMI-NEXT:    vpbroadcastb (%rdi), %zmm0
230; AVX512VBMI-NEXT:    retq
231  %tmp = load i32, i32* %ptr, align 4
232  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
233  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
234  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <64 x i32> zeroinitializer
235  ret <64 x i8> %tmp3
236}
237
238define <64 x i8> @insert_dup_mem_v64i8_sext_i8(i8* %ptr) {
239; AVX512F-LABEL: insert_dup_mem_v64i8_sext_i8:
240; AVX512F:       # %bb.0:
241; AVX512F-NEXT:    vpbroadcastb (%rdi), %ymm0
242; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
243; AVX512F-NEXT:    retq
244;
245; AVX512BW-LABEL: insert_dup_mem_v64i8_sext_i8:
246; AVX512BW:       # %bb.0:
247; AVX512BW-NEXT:    vpbroadcastb (%rdi), %zmm0
248; AVX512BW-NEXT:    retq
249;
250; AVX512DQ-LABEL: insert_dup_mem_v64i8_sext_i8:
251; AVX512DQ:       # %bb.0:
252; AVX512DQ-NEXT:    vpbroadcastb (%rdi), %ymm0
253; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
254; AVX512DQ-NEXT:    retq
255;
256; AVX512VBMI-LABEL: insert_dup_mem_v64i8_sext_i8:
257; AVX512VBMI:       # %bb.0:
258; AVX512VBMI-NEXT:    vpbroadcastb (%rdi), %zmm0
259; AVX512VBMI-NEXT:    retq
260  %tmp = load i8, i8* %ptr, align 1
261  %tmp1 = sext i8 %tmp to i32
262  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
263  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
264  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <64 x i32> zeroinitializer
265  ret <64 x i8> %tmp4
266}
267
268define <64 x i8> @insert_dup_elt1_mem_v64i8_i32(i32* %ptr) {
269; AVX512F-LABEL: insert_dup_elt1_mem_v64i8_i32:
270; AVX512F:       # %bb.0:
271; AVX512F-NEXT:    vpbroadcastb 1(%rdi), %ymm0
272; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
273; AVX512F-NEXT:    retq
274;
275; AVX512BW-LABEL: insert_dup_elt1_mem_v64i8_i32:
276; AVX512BW:       # %bb.0:
277; AVX512BW-NEXT:    vpbroadcastb 1(%rdi), %zmm0
278; AVX512BW-NEXT:    retq
279;
280; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_i32:
281; AVX512DQ:       # %bb.0:
282; AVX512DQ-NEXT:    vpbroadcastb 1(%rdi), %ymm0
283; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
284; AVX512DQ-NEXT:    retq
285;
286; AVX512VBMI-LABEL: insert_dup_elt1_mem_v64i8_i32:
287; AVX512VBMI:       # %bb.0:
288; AVX512VBMI-NEXT:    vpbroadcastb 1(%rdi), %zmm0
289; AVX512VBMI-NEXT:    retq
290  %tmp = load i32, i32* %ptr, align 4
291  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
292  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
293  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
294  ret <64 x i8> %tmp3
295}
296
297define <64 x i8> @insert_dup_elt3_mem_v64i8_i32(i32* %ptr) {
298; AVX512F-LABEL: insert_dup_elt3_mem_v64i8_i32:
299; AVX512F:       # %bb.0:
300; AVX512F-NEXT:    vpbroadcastb 3(%rdi), %ymm0
301; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
302; AVX512F-NEXT:    retq
303;
304; AVX512BW-LABEL: insert_dup_elt3_mem_v64i8_i32:
305; AVX512BW:       # %bb.0:
306; AVX512BW-NEXT:    vpbroadcastb 3(%rdi), %zmm0
307; AVX512BW-NEXT:    retq
308;
309; AVX512DQ-LABEL: insert_dup_elt3_mem_v64i8_i32:
310; AVX512DQ:       # %bb.0:
311; AVX512DQ-NEXT:    vpbroadcastb 3(%rdi), %ymm0
312; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
313; AVX512DQ-NEXT:    retq
314;
315; AVX512VBMI-LABEL: insert_dup_elt3_mem_v64i8_i32:
316; AVX512VBMI:       # %bb.0:
317; AVX512VBMI-NEXT:    vpbroadcastb 3(%rdi), %zmm0
318; AVX512VBMI-NEXT:    retq
319  %tmp = load i32, i32* %ptr, align 4
320  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
321  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
322  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <64 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
323  ret <64 x i8> %tmp3
324}
325
326define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) {
327; AVX512F-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
328; AVX512F:       # %bb.0:
329; AVX512F-NEXT:    movsbl (%rdi), %eax
330; AVX512F-NEXT:    shrl $8, %eax
331; AVX512F-NEXT:    vmovd %eax, %xmm0
332; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
333; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
334; AVX512F-NEXT:    retq
335;
336; AVX512BW-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
337; AVX512BW:       # %bb.0:
338; AVX512BW-NEXT:    movsbl (%rdi), %eax
339; AVX512BW-NEXT:    shrl $8, %eax
340; AVX512BW-NEXT:    vpbroadcastb %eax, %zmm0
341; AVX512BW-NEXT:    retq
342;
343; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
344; AVX512DQ:       # %bb.0:
345; AVX512DQ-NEXT:    movsbl (%rdi), %eax
346; AVX512DQ-NEXT:    shrl $8, %eax
347; AVX512DQ-NEXT:    vmovd %eax, %xmm0
348; AVX512DQ-NEXT:    vpbroadcastb %xmm0, %ymm0
349; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
350; AVX512DQ-NEXT:    retq
351;
352; AVX512VBMI-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
353; AVX512VBMI:       # %bb.0:
354; AVX512VBMI-NEXT:    movsbl (%rdi), %eax
355; AVX512VBMI-NEXT:    shrl $8, %eax
356; AVX512VBMI-NEXT:    vpbroadcastb %eax, %zmm0
357; AVX512VBMI-NEXT:    retq
358  %tmp = load i8, i8* %ptr, align 1
359  %tmp1 = sext i8 %tmp to i32
360  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
361  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
362  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <64 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
363  ret <64 x i8> %tmp4
364}
365
366define <64 x i8> @shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz(<64 x i8> %a) {
367; AVX512F-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz:
368; AVX512F:       # %bb.0:
369; AVX512F-NEXT:    vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
370; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
371; AVX512F-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
372; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
373; AVX512F-NEXT:    retq
374;
375; AVX512BW-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz:
376; AVX512BW:       # %bb.0:
377; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
378; AVX512BW-NEXT:    retq
379;
380; AVX512DQ-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz:
381; AVX512DQ:       # %bb.0:
382; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
383; AVX512DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
384; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
385; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
386; AVX512DQ-NEXT:    retq
387;
388; AVX512VBMI-LABEL: shuffle_v64i8_64_zz_zz_zz_zz_zz_zz_zz_65_zz_zz_zz_zz_zz_zz_zz_66_zz_zz_zz_zz_zz_zz_zz_67_zz_zz_zz_zz_zz_zz_zz_68_zz_zz_zz_zz_zz_zz_zz_69_zz_zz_zz_zz_zz_zz_zz_70_zz_zz_zz_zz_zz_zz_zz_71_zz_zz_zz_zz_zz_zz_zz:
389; AVX512VBMI:       # %bb.0:
390; AVX512VBMI-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
391; AVX512VBMI-NEXT:    retq
392  %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 65, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 66, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 67, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 68, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 69, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 70, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 71, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
393  ret <64 x i8> %shuffle
394}
395
396define <64 x i8> @shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz(<64 x i8> %a) {
397; AVX512F-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz:
398; AVX512F:       # %bb.0:
399; AVX512F-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
400; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
401; AVX512F-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
402; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
403; AVX512F-NEXT:    retq
404;
405; AVX512BW-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz:
406; AVX512BW:       # %bb.0:
407; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
408; AVX512BW-NEXT:    retq
409;
410; AVX512DQ-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz:
411; AVX512DQ:       # %bb.0:
412; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
413; AVX512DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
414; AVX512DQ-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
415; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
416; AVX512DQ-NEXT:    retq
417;
418; AVX512VBMI-LABEL: shuffle_v64i8_64_zz_zz_zz_65_zz_zz_zz_66_zz_zz_zz_67_zz_zz_zz_68_zz_zz_zz_69_zz_zz_zz_70_zz_zz_zz_71_zz_zz_zz_72_zz_zz_zz_73_zz_zz_zz_74_zz_zz_zz_75_zz_zz_zz_76_zz_zz_zz_77_zz_zz_zz_78_zz_zz_zz_79_zz_zz_zz:
419; AVX512VBMI:       # %bb.0:
420; AVX512VBMI-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
421; AVX512VBMI-NEXT:    retq
422  %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 0, i32 0, i32 65, i32 0, i32 0, i32 0, i32 66, i32 0, i32 0, i32 0, i32 67, i32 0, i32 0, i32 0, i32 68, i32 0, i32 0, i32 0, i32 69, i32 0, i32 0, i32 0, i32 70, i32 0, i32 0, i32 0, i32 71, i32 0, i32 0, i32 0, i32 72, i32 0, i32 0, i32 0, i32 73, i32 0, i32 0, i32 0, i32 74, i32 0, i32 0, i32 0, i32 75, i32 0, i32 0, i32 0, i32 76, i32 0, i32 0, i32 0, i32 77, i32 0, i32 0, i32 0, i32 78, i32 0, i32 0, i32 0, i32 79, i32 0, i32 0, i32 0>
423  ret <64 x i8> %shuffle
424}
425
426define <64 x i8> @shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz(<64 x i8> %a) {
427; AVX512F-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz:
428; AVX512F:       # %bb.0:
429; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
430; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
431; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
432; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
433; AVX512F-NEXT:    retq
434;
435; AVX512BW-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz:
436; AVX512BW:       # %bb.0:
437; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
438; AVX512BW-NEXT:    retq
439;
440; AVX512DQ-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz:
441; AVX512DQ:       # %bb.0:
442; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
443; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
444; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
445; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
446; AVX512DQ-NEXT:    retq
447;
448; AVX512VBMI-LABEL: shuffle_v64i8_64_zz_65_zz_66_zz_67_zz_68_zz_69_zz_70_zz_71_zz_72_zz_73_zz_74_zz_75_zz_76_zz_77_zz_78_zz_79_zz_80_zz_81_zz_82_zz_83_zz_84_zz_85_zz_86_zz_87_zz_88_zz_89_zz_90_zz_91_zz_92_zz_93_zz_94_zz_95_zz:
449; AVX512VBMI:       # %bb.0:
450; AVX512VBMI-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
451; AVX512VBMI-NEXT:    retq
452  %shuffle = shufflevector <64 x i8> zeroinitializer, <64 x i8> %a, <64 x i32> <i32 64, i32 0, i32 65, i32 0, i32 66, i32 0, i32 67, i32 0, i32 68, i32 0, i32 69, i32 0, i32 70, i32 0, i32 71, i32 0, i32 72, i32 0, i32 73, i32 0, i32 74, i32 0, i32 75, i32 0, i32 76, i32 0, i32 77, i32 0, i32 78, i32 0, i32 79, i32 0, i32 80, i32 0, i32 81, i32 0, i32 82, i32 0, i32 83, i32 0, i32 84, i32 0, i32 85, i32 0, i32 86, i32 0, i32 87, i32 0, i32 88, i32 0, i32 89, i32 0, i32 90, i32 0, i32 91, i32 0, i32 92, i32 0, i32 93, i32 0, i32 94, i32 0, i32 95, i32 0>
453  ret <64 x i8> %shuffle
454}
455
456define <64 x i8> @shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz(<64 x i8> %a) {
457; AVX512F-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz:
458; AVX512F:       # %bb.0:
459; AVX512F-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
460; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128,15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128]
461; AVX512F-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
462; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
463; AVX512F-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
464; AVX512F-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
465; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
466; AVX512F-NEXT:    retq
467;
468; AVX512BW-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz:
469; AVX512BW:       # %bb.0:
470; AVX512BW-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1]
471; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zmm0[15],zero,zmm0[13],zero,zmm0[11],zero,zmm0[9],zero,zmm0[7],zero,zmm0[5],zero,zmm0[3],zero,zmm0[1],zero,zmm0[31],zero,zmm0[29],zero,zmm0[27],zero,zmm0[25],zero,zmm0[23],zero,zmm0[21],zero,zmm0[19],zero,zmm0[17],zero,zmm0[47],zero,zmm0[45],zero,zmm0[43],zero,zmm0[41],zero,zmm0[39],zero,zmm0[37],zero,zmm0[35],zero,zmm0[33],zero,zmm0[63],zero,zmm0[61],zero,zmm0[59],zero,zmm0[57],zero,zmm0[55],zero,zmm0[53],zero,zmm0[51],zero,zmm0[49],zero
472; AVX512BW-NEXT:    retq
473;
474; AVX512DQ-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz:
475; AVX512DQ:       # %bb.0:
476; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
477; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128,15,128,13,128,11,128,9,128,7,128,5,128,3,128,1,128]
478; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
479; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
480; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
481; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
482; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
483; AVX512DQ-NEXT:    retq
484;
485; AVX512VBMI-LABEL: shuffle_v64i8_63_zz_61_zz_59_zz_57_zz_55_zz_53_zz_51_zz_49_zz_47_zz_45_zz_43_zz_41_zz_39_zz_37_zz_35_zz_33_zz_31_zz_29_zz_27_zz_25_zz_23_zz_21_zz_19_zz_17_zz_15_zz_13_zz_11_zz_9_zz_7_zz_5_zz_3_zz_1_zz:
486; AVX512VBMI:       # %bb.0:
487; AVX512VBMI-NEXT:    vpxor %xmm1, %xmm1, %xmm1
488; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [63,65,61,67,59,69,57,71,55,73,53,75,51,77,49,79,47,81,45,83,43,85,41,87,39,89,37,91,35,93,33,95,31,97,29,99,27,101,25,103,23,105,21,107,19,109,17,111,15,113,13,115,11,117,9,119,7,121,5,123,3,125,1,127]
489; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
490; AVX512VBMI-NEXT:    retq
491  %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> <i32 63, i32 64, i32 61, i32 64, i32 59, i32 64, i32 57, i32 64, i32 55, i32 64, i32 53, i32 64, i32 51, i32 64, i32 49, i32 64, i32 47, i32 64, i32 45, i32 64, i32 43, i32 64, i32 41, i32 64, i32 39, i32 64, i32 37, i32 64, i32 35, i32 64, i32 33, i32 64, i32 31, i32 64, i32 29, i32 64, i32 27, i32 64, i32 25, i32 64, i32 23, i32 64, i32 21, i32 64, i32 19, i32 64, i32 17, i32 64, i32 15, i32 64, i32 13, i32 64, i32 11, i32 64, i32 9, i32 64, i32 7, i32 64, i32 5, i32 64, i32 3, i32 64, i32 1, i32 64>
492  ret <64 x i8> %shuffle
493}
494
495define <64 x i8> @shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126(<64 x i8> %a, <64 x i8> %b) {
496; AVX512F-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126:
497; AVX512F:       # %bb.0:
498; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
499; AVX512F-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1]
500; AVX512F-NEXT:    vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
501; AVX512F-NEXT:    vpblendvb %ymm4, %ymm2, %ymm3, %ymm2
502; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14]
503; AVX512F-NEXT:    vpshufb %ymm3, %ymm2, %ymm2
504; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
505; AVX512F-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
506; AVX512F-NEXT:    vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
507; AVX512F-NEXT:    vpshufb %ymm3, %ymm0, %ymm0
508; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
509; AVX512F-NEXT:    retq
510;
511; AVX512BW-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126:
512; AVX512BW:       # %bb.0:
513; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
514; AVX512BW-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5,2,3,0,1]
515; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
516; AVX512BW-NEXT:    vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
517; AVX512BW-NEXT:    vpblendvb %ymm4, %ymm2, %ymm3, %ymm2
518; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14]
519; AVX512BW-NEXT:    vpshufb %ymm3, %ymm2, %ymm2
520; AVX512BW-NEXT:    vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
521; AVX512BW-NEXT:    vpshufb %ymm3, %ymm0, %ymm0
522; AVX512BW-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
523; AVX512BW-NEXT:    retq
524;
525; AVX512DQ-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126:
526; AVX512DQ:       # %bb.0:
527; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
528; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1]
529; AVX512DQ-NEXT:    vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
530; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm3, %ymm2
531; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14]
532; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm2, %ymm2
533; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
534; AVX512DQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
535; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
536; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm0, %ymm0
537; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
538; AVX512DQ-NEXT:    retq
539;
540; AVX512VBMI-LABEL: shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_47_80_45_82_43_84_41_86_39_88_37_90_35_92_33_94_31_96_29_98_27_100_25_102_23_104_21_106_19_108_17_110_15_112_13_114_11_116_9_118_7_120_5_122_3_124_1_126:
541; AVX512VBMI:       # %bb.0:
542; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [63,64,61,66,59,68,57,70,55,72,53,74,51,76,49,78,47,80,45,82,43,84,41,86,39,88,37,90,35,92,33,94,31,96,29,98,27,100,25,102,23,104,21,106,19,108,17,110,15,112,13,114,11,116,9,118,7,120,5,122,3,124,1,126]
543; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
544; AVX512VBMI-NEXT:    retq
545  %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> <i32 63, i32 64, i32 61, i32 66, i32 59, i32 68, i32 57, i32 70, i32 55, i32 72, i32 53, i32 74, i32 51, i32 76, i32 49, i32 78, i32 47, i32 80, i32 45, i32 82, i32 43, i32 84, i32 41, i32 86, i32 39, i32 88, i32 37, i32 90, i32 35, i32 92, i32 33, i32 94, i32 31, i32 96, i32 29, i32 98, i32 27, i32 100, i32 25, i32 102, i32 23, i32 104, i32 21, i32 106, i32 19, i32 108, i32 17, i32 110, i32 15, i32 112, i32 13, i32 114, i32 11, i32 116, i32 9, i32 118, i32 7, i32 120, i32 5, i32 122, i32 3, i32 124, i32 1, i32 126>
546  ret <64 x i8> %shuffle
547}
548
549define <64 x i8> @shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind {
550; AVX512F-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
551; AVX512F:       # %bb.0:
552; AVX512F-NEXT:    vpsrad $25, %zmm0, %zmm0
553; AVX512F-NEXT:    vpsrad $25, %zmm1, %zmm1
554; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
555; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
556; AVX512F-NEXT:    vpackssdw %ymm2, %ymm3, %ymm2
557; AVX512F-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
558; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
559; AVX512F-NEXT:    retq
560;
561; AVX512BW-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
562; AVX512BW:       # %bb.0:
563; AVX512BW-NEXT:    vpsrad $25, %zmm0, %zmm0
564; AVX512BW-NEXT:    vpsrad $25, %zmm1, %zmm1
565; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
566; AVX512BW-NEXT:    retq
567;
568; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
569; AVX512DQ:       # %bb.0:
570; AVX512DQ-NEXT:    vpsrad $25, %zmm0, %zmm0
571; AVX512DQ-NEXT:    vpsrad $25, %zmm1, %zmm1
572; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
573; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
574; AVX512DQ-NEXT:    vpackssdw %ymm2, %ymm3, %ymm2
575; AVX512DQ-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
576; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
577; AVX512DQ-NEXT:    retq
578;
579; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
580; AVX512VBMI:       # %bb.0:
581; AVX512VBMI-NEXT:    vpsrad $25, %zmm0, %zmm0
582; AVX512VBMI-NEXT:    vpsrad $25, %zmm1, %zmm1
583; AVX512VBMI-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
584; AVX512VBMI-NEXT:    retq
585  %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
586  %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
587  %3 = bitcast <16 x i32> %1 to <64 x i8>
588  %4 = bitcast <16 x i32> %2 to <64 x i8>
589  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32  0, i32  1, i32  4, i32  5, i32  8, i32  9, i32 12, i32 13, i32  64, i32  65, i32  68, i32  69, i32  72, i32  73, i32  76, i32  77, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32  80, i32  81, i32  84, i32  85, i32  88, i32  89, i32  92, i32  93, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32  96, i32  97, i32 100, i32 101, i32 104, i32 105, i32 108, i32 109, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61, i32 112, i32 113, i32 116, i32 117, i32 120, i32 121, i32 124, i32 125>
590  ret <64 x i8> %5
591}
592
593define <64 x i8> @shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind {
594; AVX512F-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
595; AVX512F:       # %bb.0:
596; AVX512F-NEXT:    vpsrad $25, %zmm0, %zmm0
597; AVX512F-NEXT:    vpsrad $25, %zmm1, %zmm1
598; AVX512F-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2
599; AVX512F-NEXT:    vpacksswb %ymm2, %ymm2, %ymm2
600; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
601; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
602; AVX512F-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
603; AVX512F-NEXT:    vpacksswb %ymm0, %ymm0, %ymm0
604; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
605; AVX512F-NEXT:    retq
606;
607; AVX512BW-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
608; AVX512BW:       # %bb.0:
609; AVX512BW-NEXT:    vpsrad $25, %zmm0, %zmm0
610; AVX512BW-NEXT:    vpsrad $25, %zmm1, %zmm1
611; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
612; AVX512BW-NEXT:    vpacksswb %zmm0, %zmm0, %zmm0
613; AVX512BW-NEXT:    retq
614;
615; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
616; AVX512DQ:       # %bb.0:
617; AVX512DQ-NEXT:    vpsrad $25, %zmm0, %zmm0
618; AVX512DQ-NEXT:    vpsrad $25, %zmm1, %zmm1
619; AVX512DQ-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2
620; AVX512DQ-NEXT:    vpacksswb %ymm2, %ymm2, %ymm2
621; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
622; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
623; AVX512DQ-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
624; AVX512DQ-NEXT:    vpacksswb %ymm0, %ymm0, %ymm0
625; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
626; AVX512DQ-NEXT:    retq
627;
628; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
629; AVX512VBMI:       # %bb.0:
630; AVX512VBMI-NEXT:    vpsrad $25, %zmm0, %zmm0
631; AVX512VBMI-NEXT:    vpsrad $25, %zmm1, %zmm1
632; AVX512VBMI-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
633; AVX512VBMI-NEXT:    vpacksswb %zmm0, %zmm0, %zmm0
634; AVX512VBMI-NEXT:    retq
635  %1 = ashr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
636  %2 = ashr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
637  %3 = bitcast <16 x i32> %1 to <64 x i8>
638  %4 = bitcast <16 x i32> %2 to <64 x i8>
639  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32  0, i32  4, i32  8, i32 12, i32  64, i32  68, i32  72, i32  76, i32  0, i32  4, i32  8, i32 12, i32  64, i32  68, i32  72, i32  76, i32 16, i32 20, i32 24, i32 28, i32  80, i32  84, i32  88, i32  92, i32 16, i32 20, i32 24, i32 28, i32  80, i32  84, i32  88, i32  92, i32 32, i32 36, i32 40, i32 44, i32  96, i32 100, i32 104, i32 108, i32 32, i32 36, i32 40, i32 44, i32  96, i32 100, i32 104, i32 108, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124>
640  ret <64 x i8> %5
641}
642
643define <64 x i8> @shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind {
644; AVX512F-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
645; AVX512F:       # %bb.0:
646; AVX512F-NEXT:    vpsrld $25, %zmm0, %zmm0
647; AVX512F-NEXT:    vpsrld $25, %zmm1, %zmm1
648; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
649; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
650; AVX512F-NEXT:    vpackusdw %ymm2, %ymm3, %ymm2
651; AVX512F-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
652; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
653; AVX512F-NEXT:    retq
654;
655; AVX512BW-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
656; AVX512BW:       # %bb.0:
657; AVX512BW-NEXT:    vpsrld $25, %zmm0, %zmm0
658; AVX512BW-NEXT:    vpsrld $25, %zmm1, %zmm1
659; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
660; AVX512BW-NEXT:    retq
661;
662; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
663; AVX512DQ:       # %bb.0:
664; AVX512DQ-NEXT:    vpsrld $25, %zmm0, %zmm0
665; AVX512DQ-NEXT:    vpsrld $25, %zmm1, %zmm1
666; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
667; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
668; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm3, %ymm2
669; AVX512DQ-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
670; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
671; AVX512DQ-NEXT:    retq
672;
673; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125:
674; AVX512VBMI:       # %bb.0:
675; AVX512VBMI-NEXT:    vpsrld $25, %zmm0, %zmm0
676; AVX512VBMI-NEXT:    vpsrld $25, %zmm1, %zmm1
677; AVX512VBMI-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
678; AVX512VBMI-NEXT:    retq
679  %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
680  %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
681  %3 = bitcast <16 x i32> %1 to <64 x i8>
682  %4 = bitcast <16 x i32> %2 to <64 x i8>
683  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32  0, i32  1, i32  4, i32  5, i32  8, i32  9, i32 12, i32 13, i32  64, i32  65, i32  68, i32  69, i32  72, i32  73, i32  76, i32  77, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29, i32  80, i32  81, i32  84, i32  85, i32  88, i32  89, i32  92, i32  93, i32 32, i32 33, i32 36, i32 37, i32 40, i32 41, i32 44, i32 45, i32  96, i32  97, i32 100, i32 101, i32 104, i32 105, i32 108, i32 109, i32 48, i32 49, i32 52, i32 53, i32 56, i32 57, i32 60, i32 61, i32 112, i32 113, i32 116, i32 117, i32 120, i32 121, i32 124, i32 125>
684  ret <64 x i8> %5
685}
686
687define <64 x i8> @shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind {
688; AVX512F-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
689; AVX512F:       # %bb.0:
690; AVX512F-NEXT:    vpsrld $25, %zmm0, %zmm0
691; AVX512F-NEXT:    vpsrld $25, %zmm1, %zmm1
692; AVX512F-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2
693; AVX512F-NEXT:    vpackuswb %ymm2, %ymm2, %ymm2
694; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
695; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
696; AVX512F-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
697; AVX512F-NEXT:    vpackuswb %ymm0, %ymm0, %ymm0
698; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
699; AVX512F-NEXT:    retq
700;
701; AVX512BW-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
702; AVX512BW:       # %bb.0:
703; AVX512BW-NEXT:    vpsrld $25, %zmm0, %zmm0
704; AVX512BW-NEXT:    vpsrld $25, %zmm1, %zmm1
705; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
706; AVX512BW-NEXT:    vpackuswb %zmm0, %zmm0, %zmm0
707; AVX512BW-NEXT:    retq
708;
709; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
710; AVX512DQ:       # %bb.0:
711; AVX512DQ-NEXT:    vpsrld $25, %zmm0, %zmm0
712; AVX512DQ-NEXT:    vpsrld $25, %zmm1, %zmm1
713; AVX512DQ-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2
714; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm2, %ymm2
715; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
716; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
717; AVX512DQ-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
718; AVX512DQ-NEXT:    vpackuswb %ymm0, %ymm0, %ymm0
719; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
720; AVX512DQ-NEXT:    retq
721;
722; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124:
723; AVX512VBMI:       # %bb.0:
724; AVX512VBMI-NEXT:    vpsrld $25, %zmm0, %zmm0
725; AVX512VBMI-NEXT:    vpsrld $25, %zmm1, %zmm1
726; AVX512VBMI-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
727; AVX512VBMI-NEXT:    vpackuswb %zmm0, %zmm0, %zmm0
728; AVX512VBMI-NEXT:    retq
729  %1 = lshr <16 x i32> %a0, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
730  %2 = lshr <16 x i32> %a1, <i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25, i32 25>
731  %3 = bitcast <16 x i32> %1 to <64 x i8>
732  %4 = bitcast <16 x i32> %2 to <64 x i8>
733  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32  0, i32  4, i32  8, i32 12, i32  64, i32  68, i32  72, i32  76, i32  0, i32  4, i32  8, i32 12, i32  64, i32  68, i32  72, i32  76, i32 16, i32 20, i32 24, i32 28, i32  80, i32  84, i32  88, i32  92, i32 16, i32 20, i32 24, i32 28, i32  80, i32  84, i32  88, i32  92, i32 32, i32 36, i32 40, i32 44, i32  96, i32 100, i32 104, i32 108, i32 32, i32 36, i32 40, i32 44, i32  96, i32 100, i32 104, i32 108, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124, i32 48, i32 52, i32 56, i32 60, i32 112, i32 116, i32 120, i32 124>
734  ret <64 x i8> %5
735}
736
737define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) {
738; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
739; AVX512F:       # %bb.0:
740; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
741; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
742; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm0
743; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
744; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
745; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
746; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1
747; AVX512F-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
748; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
749; AVX512F-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7]
750; AVX512F-NEXT:    retq
751;
752; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
753; AVX512BW:       # %bb.0:
754; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
755; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
756; AVX512BW-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm0[2,3,6,7],zmm1[2,3,6,7]
757; AVX512BW-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
758; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm4 = <u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14>
759; AVX512BW-NEXT:    vpshufb %ymm4, %ymm3, %ymm3
760; AVX512BW-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[0,1,4,5]
761; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
762; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
763; AVX512BW-NEXT:    vpshufb %ymm5, %ymm1, %ymm1
764; AVX512BW-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3],ymm1[4,5],ymm3[6,7]
765; AVX512BW-NEXT:    vpshufb %ymm4, %ymm2, %ymm2
766; AVX512BW-NEXT:    vpshufb %ymm5, %ymm0, %ymm0
767; AVX512BW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7]
768; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
769; AVX512BW-NEXT:    retq
770;
771; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
772; AVX512DQ:       # %bb.0:
773; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
774; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
775; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
776; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
777; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
778; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
779; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
780; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
781; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
782; AVX512DQ-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7]
783; AVX512DQ-NEXT:    retq
784;
785; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
786; AVX512VBMI:       # %bb.0:
787; AVX512VBMI-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127]
788; AVX512VBMI-NEXT:    vpermt2b %zmm1, %zmm2, %zmm0
789; AVX512VBMI-NEXT:    retq
790  %1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
791  %2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
792  %3 = bitcast <32 x i16> %1 to <64 x i8>
793  %4 = bitcast <32 x i16> %2 to <64 x i8>
794  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126>
795  ret <64 x i8> %5
796}
797
798define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) {
799; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
800; AVX512F:       # %bb.0:
801; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm2
802; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
803; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm0
804; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm3
805; AVX512F-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
806; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
807; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1
808; AVX512F-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
809; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
810; AVX512F-NEXT:    retq
811;
812; AVX512BW-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
813; AVX512BW:       # %bb.0:
814; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
815; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
816; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
817; AVX512BW-NEXT:    retq
818;
819; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
820; AVX512DQ:       # %bb.0:
821; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm2
822; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
823; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
824; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm3
825; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
826; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
827; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
828; AVX512DQ-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
829; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
830; AVX512DQ-NEXT:    retq
831;
832; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
833; AVX512VBMI:       # %bb.0:
834; AVX512VBMI-NEXT:    vpsrlw $8, %zmm0, %zmm0
835; AVX512VBMI-NEXT:    vpsrlw $8, %zmm1, %zmm1
836; AVX512VBMI-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
837; AVX512VBMI-NEXT:    retq
838  %1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
839  %2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
840  %3 = bitcast <32 x i16> %1 to <64 x i8>
841  %4 = bitcast <32 x i16> %2 to <64 x i8>
842  %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 64, i32 66, i32 68, i32 70, i32 72, i32 74, i32 76, i32 78, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 80, i32 82, i32 84, i32 86, i32 88, i32 90, i32 92, i32 94, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 96, i32 98, i32 100, i32 102, i32 104, i32 106, i32 108, i32 110, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 112, i32 114, i32 116, i32 118, i32 120, i32 122, i32 124, i32 126>
843  ret <64 x i8> %5
844}
845