1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 9; 10; Just two 32-bit runs to make sure we do reasonable things there. 11; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE41 13 14define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 15; SSE2-LABEL: sext_16i8_to_8i16: 16; SSE2: # %bb.0: # %entry 17; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 18; SSE2-NEXT: psraw $8, %xmm0 19; SSE2-NEXT: retq 20; 21; SSSE3-LABEL: sext_16i8_to_8i16: 22; SSSE3: # %bb.0: # %entry 23; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 24; SSSE3-NEXT: psraw $8, %xmm0 25; SSSE3-NEXT: retq 26; 27; SSE41-LABEL: sext_16i8_to_8i16: 28; SSE41: # %bb.0: # %entry 29; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 30; SSE41-NEXT: retq 31; 32; AVX-LABEL: sext_16i8_to_8i16: 33; AVX: # %bb.0: # %entry 34; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 35; AVX-NEXT: retq 36; 37; X86-SSE2-LABEL: sext_16i8_to_8i16: 38; X86-SSE2: # %bb.0: # %entry 39; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 40; X86-SSE2-NEXT: psraw $8, %xmm0 41; X86-SSE2-NEXT: retl 42; 43; X86-SSE41-LABEL: sext_16i8_to_8i16: 44; X86-SSE41: # %bb.0: # %entry 45; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 46; X86-SSE41-NEXT: retl 47entry: 48 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 49 %C = sext <8 x i8> %B to <8 x i16> 50 ret <8 x i16> %C 51} 52 53define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp { 54; SSE2-LABEL: sext_16i8_to_16i16: 55; SSE2: # %bb.0: # %entry 56; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 57; SSE2-NEXT: psraw $8, %xmm2 58; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 59; SSE2-NEXT: psraw $8, %xmm1 60; SSE2-NEXT: movdqa %xmm2, %xmm0 61; SSE2-NEXT: retq 62; 63; SSSE3-LABEL: sext_16i8_to_16i16: 64; SSSE3: # %bb.0: # %entry 65; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 66; SSSE3-NEXT: psraw $8, %xmm2 67; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 68; SSSE3-NEXT: psraw $8, %xmm1 69; SSSE3-NEXT: movdqa %xmm2, %xmm0 70; SSSE3-NEXT: retq 71; 72; SSE41-LABEL: sext_16i8_to_16i16: 73; SSE41: # %bb.0: # %entry 74; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 75; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 76; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 77; SSE41-NEXT: movdqa %xmm2, %xmm0 78; SSE41-NEXT: retq 79; 80; AVX1-LABEL: sext_16i8_to_16i16: 81; AVX1: # %bb.0: # %entry 82; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 83; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 84; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 85; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 86; AVX1-NEXT: retq 87; 88; AVX2-LABEL: sext_16i8_to_16i16: 89; AVX2: # %bb.0: # %entry 90; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 91; AVX2-NEXT: retq 92; 93; AVX512-LABEL: sext_16i8_to_16i16: 94; AVX512: # %bb.0: # %entry 95; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0 96; AVX512-NEXT: retq 97; 98; X86-SSE2-LABEL: sext_16i8_to_16i16: 99; X86-SSE2: # %bb.0: # %entry 100; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 101; X86-SSE2-NEXT: psraw $8, %xmm2 102; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 103; X86-SSE2-NEXT: psraw $8, %xmm1 104; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 105; X86-SSE2-NEXT: retl 106; 107; X86-SSE41-LABEL: sext_16i8_to_16i16: 108; X86-SSE41: # %bb.0: # %entry 109; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm2 110; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 111; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm1 112; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 113; X86-SSE41-NEXT: retl 114entry: 115 %B = sext <16 x i8> %A to <16 x i16> 116 ret <16 x i16> %B 117} 118 119define <32 x i16> @sext_32i8_to_32i16(<32 x i8> %A) nounwind uwtable readnone ssp { 120; SSE2-LABEL: sext_32i8_to_32i16: 121; SSE2: # %bb.0: # %entry 122; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 123; SSE2-NEXT: psraw $8, %xmm4 124; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 125; SSE2-NEXT: psraw $8, %xmm5 126; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 127; SSE2-NEXT: psraw $8, %xmm2 128; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 129; SSE2-NEXT: psraw $8, %xmm3 130; SSE2-NEXT: movdqa %xmm4, %xmm0 131; SSE2-NEXT: movdqa %xmm5, %xmm1 132; SSE2-NEXT: retq 133; 134; SSSE3-LABEL: sext_32i8_to_32i16: 135; SSSE3: # %bb.0: # %entry 136; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 137; SSSE3-NEXT: psraw $8, %xmm4 138; SSSE3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 139; SSSE3-NEXT: psraw $8, %xmm5 140; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 141; SSSE3-NEXT: psraw $8, %xmm2 142; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 143; SSSE3-NEXT: psraw $8, %xmm3 144; SSSE3-NEXT: movdqa %xmm4, %xmm0 145; SSSE3-NEXT: movdqa %xmm5, %xmm1 146; SSSE3-NEXT: retq 147; 148; SSE41-LABEL: sext_32i8_to_32i16: 149; SSE41: # %bb.0: # %entry 150; SSE41-NEXT: pmovsxbw %xmm0, %xmm5 151; SSE41-NEXT: pmovsxbw %xmm1, %xmm2 152; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 153; SSE41-NEXT: pmovsxbw %xmm0, %xmm4 154; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 155; SSE41-NEXT: pmovsxbw %xmm0, %xmm3 156; SSE41-NEXT: movdqa %xmm5, %xmm0 157; SSE41-NEXT: movdqa %xmm4, %xmm1 158; SSE41-NEXT: retq 159; 160; AVX1-LABEL: sext_32i8_to_32i16: 161; AVX1: # %bb.0: # %entry 162; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 163; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 164; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2 165; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 166; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 167; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 168; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 169; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 170; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 171; AVX1-NEXT: vmovaps %ymm2, %ymm0 172; AVX1-NEXT: retq 173; 174; AVX2-LABEL: sext_32i8_to_32i16: 175; AVX2: # %bb.0: # %entry 176; AVX2-NEXT: vpmovsxbw %xmm0, %ymm2 177; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 178; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 179; AVX2-NEXT: vmovdqa %ymm2, %ymm0 180; AVX2-NEXT: retq 181; 182; AVX512F-LABEL: sext_32i8_to_32i16: 183; AVX512F: # %bb.0: # %entry 184; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 185; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 186; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 187; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 188; AVX512F-NEXT: retq 189; 190; AVX512BW-LABEL: sext_32i8_to_32i16: 191; AVX512BW: # %bb.0: # %entry 192; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 193; AVX512BW-NEXT: retq 194; 195; X86-SSE2-LABEL: sext_32i8_to_32i16: 196; X86-SSE2: # %bb.0: # %entry 197; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 198; X86-SSE2-NEXT: psraw $8, %xmm4 199; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 200; X86-SSE2-NEXT: psraw $8, %xmm5 201; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 202; X86-SSE2-NEXT: psraw $8, %xmm2 203; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 204; X86-SSE2-NEXT: psraw $8, %xmm3 205; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 206; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 207; X86-SSE2-NEXT: retl 208; 209; X86-SSE41-LABEL: sext_32i8_to_32i16: 210; X86-SSE41: # %bb.0: # %entry 211; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm5 212; X86-SSE41-NEXT: pmovsxbw %xmm1, %xmm2 213; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 214; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm4 215; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 216; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm3 217; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 218; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 219; X86-SSE41-NEXT: retl 220entry: 221 %B = sext <32 x i8> %A to <32 x i16> 222 ret <32 x i16> %B 223} 224 225define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 226; SSE2-LABEL: sext_16i8_to_4i32: 227; SSE2: # %bb.0: # %entry 228; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 229; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 230; SSE2-NEXT: psrad $24, %xmm0 231; SSE2-NEXT: retq 232; 233; SSSE3-LABEL: sext_16i8_to_4i32: 234; SSSE3: # %bb.0: # %entry 235; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 236; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 237; SSSE3-NEXT: psrad $24, %xmm0 238; SSSE3-NEXT: retq 239; 240; SSE41-LABEL: sext_16i8_to_4i32: 241; SSE41: # %bb.0: # %entry 242; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 243; SSE41-NEXT: retq 244; 245; AVX-LABEL: sext_16i8_to_4i32: 246; AVX: # %bb.0: # %entry 247; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 248; AVX-NEXT: retq 249; 250; X86-SSE2-LABEL: sext_16i8_to_4i32: 251; X86-SSE2: # %bb.0: # %entry 252; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 253; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 254; X86-SSE2-NEXT: psrad $24, %xmm0 255; X86-SSE2-NEXT: retl 256; 257; X86-SSE41-LABEL: sext_16i8_to_4i32: 258; X86-SSE41: # %bb.0: # %entry 259; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 260; X86-SSE41-NEXT: retl 261entry: 262 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 263 %C = sext <4 x i8> %B to <4 x i32> 264 ret <4 x i32> %C 265} 266 267define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 268; SSE2-LABEL: sext_16i8_to_8i32: 269; SSE2: # %bb.0: # %entry 270; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 271; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 272; SSE2-NEXT: psrad $24, %xmm0 273; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 274; SSE2-NEXT: psrad $24, %xmm1 275; SSE2-NEXT: retq 276; 277; SSSE3-LABEL: sext_16i8_to_8i32: 278; SSSE3: # %bb.0: # %entry 279; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 280; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 281; SSSE3-NEXT: psrad $24, %xmm0 282; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 283; SSSE3-NEXT: psrad $24, %xmm1 284; SSSE3-NEXT: retq 285; 286; SSE41-LABEL: sext_16i8_to_8i32: 287; SSE41: # %bb.0: # %entry 288; SSE41-NEXT: pmovsxbd %xmm0, %xmm2 289; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 290; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 291; SSE41-NEXT: movdqa %xmm2, %xmm0 292; SSE41-NEXT: retq 293; 294; AVX1-LABEL: sext_16i8_to_8i32: 295; AVX1: # %bb.0: # %entry 296; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 297; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 298; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 299; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 300; AVX1-NEXT: retq 301; 302; AVX2-LABEL: sext_16i8_to_8i32: 303; AVX2: # %bb.0: # %entry 304; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 305; AVX2-NEXT: retq 306; 307; AVX512-LABEL: sext_16i8_to_8i32: 308; AVX512: # %bb.0: # %entry 309; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 310; AVX512-NEXT: retq 311; 312; X86-SSE2-LABEL: sext_16i8_to_8i32: 313; X86-SSE2: # %bb.0: # %entry 314; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 315; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 316; X86-SSE2-NEXT: psrad $24, %xmm0 317; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 318; X86-SSE2-NEXT: psrad $24, %xmm1 319; X86-SSE2-NEXT: retl 320; 321; X86-SSE41-LABEL: sext_16i8_to_8i32: 322; X86-SSE41: # %bb.0: # %entry 323; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm2 324; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 325; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm1 326; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 327; X86-SSE41-NEXT: retl 328entry: 329 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 330 %C = sext <8 x i8> %B to <8 x i32> 331 ret <8 x i32> %C 332} 333 334define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 335; SSE2-LABEL: sext_16i8_to_16i32: 336; SSE2: # %bb.0: # %entry 337; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 338; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 339; SSE2-NEXT: psrad $24, %xmm4 340; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 341; SSE2-NEXT: psrad $24, %xmm1 342; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 343; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 344; SSE2-NEXT: psrad $24, %xmm2 345; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 346; SSE2-NEXT: psrad $24, %xmm3 347; SSE2-NEXT: movdqa %xmm4, %xmm0 348; SSE2-NEXT: retq 349; 350; SSSE3-LABEL: sext_16i8_to_16i32: 351; SSSE3: # %bb.0: # %entry 352; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 353; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 354; SSSE3-NEXT: psrad $24, %xmm4 355; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 356; SSSE3-NEXT: psrad $24, %xmm1 357; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 358; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 359; SSSE3-NEXT: psrad $24, %xmm2 360; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 361; SSSE3-NEXT: psrad $24, %xmm3 362; SSSE3-NEXT: movdqa %xmm4, %xmm0 363; SSSE3-NEXT: retq 364; 365; SSE41-LABEL: sext_16i8_to_16i32: 366; SSE41: # %bb.0: # %entry 367; SSE41-NEXT: pmovsxbd %xmm0, %xmm4 368; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 369; SSE41-NEXT: pmovsxbd %xmm1, %xmm1 370; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 371; SSE41-NEXT: pmovsxbd %xmm2, %xmm2 372; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 373; SSE41-NEXT: pmovsxbd %xmm0, %xmm3 374; SSE41-NEXT: movdqa %xmm4, %xmm0 375; SSE41-NEXT: retq 376; 377; AVX1-LABEL: sext_16i8_to_16i32: 378; AVX1: # %bb.0: # %entry 379; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 380; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 381; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 382; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 383; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 384; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1 385; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 386; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 387; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 388; AVX1-NEXT: vmovaps %ymm2, %ymm0 389; AVX1-NEXT: retq 390; 391; AVX2-LABEL: sext_16i8_to_16i32: 392; AVX2: # %bb.0: # %entry 393; AVX2-NEXT: vpmovsxbd %xmm0, %ymm2 394; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 395; AVX2-NEXT: vpmovsxbd %xmm0, %ymm1 396; AVX2-NEXT: vmovdqa %ymm2, %ymm0 397; AVX2-NEXT: retq 398; 399; AVX512-LABEL: sext_16i8_to_16i32: 400; AVX512: # %bb.0: # %entry 401; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 402; AVX512-NEXT: retq 403; 404; X86-SSE2-LABEL: sext_16i8_to_16i32: 405; X86-SSE2: # %bb.0: # %entry 406; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 407; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 408; X86-SSE2-NEXT: psrad $24, %xmm4 409; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 410; X86-SSE2-NEXT: psrad $24, %xmm1 411; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 412; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 413; X86-SSE2-NEXT: psrad $24, %xmm2 414; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 415; X86-SSE2-NEXT: psrad $24, %xmm3 416; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 417; X86-SSE2-NEXT: retl 418; 419; X86-SSE41-LABEL: sext_16i8_to_16i32: 420; X86-SSE41: # %bb.0: # %entry 421; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm4 422; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 423; X86-SSE41-NEXT: pmovsxbd %xmm1, %xmm1 424; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 425; X86-SSE41-NEXT: pmovsxbd %xmm2, %xmm2 426; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 427; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm3 428; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 429; X86-SSE41-NEXT: retl 430entry: 431 %B = sext <16 x i8> %A to <16 x i32> 432 ret <16 x i32> %B 433} 434 435define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 436; SSE2-LABEL: sext_16i8_to_2i64: 437; SSE2: # %bb.0: # %entry 438; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 439; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 440; SSE2-NEXT: pxor %xmm1, %xmm1 441; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 442; SSE2-NEXT: psrad $24, %xmm0 443; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 444; SSE2-NEXT: retq 445; 446; SSSE3-LABEL: sext_16i8_to_2i64: 447; SSSE3: # %bb.0: # %entry 448; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 449; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 450; SSSE3-NEXT: pxor %xmm1, %xmm1 451; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 452; SSSE3-NEXT: psrad $24, %xmm0 453; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 454; SSSE3-NEXT: retq 455; 456; SSE41-LABEL: sext_16i8_to_2i64: 457; SSE41: # %bb.0: # %entry 458; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 459; SSE41-NEXT: retq 460; 461; AVX-LABEL: sext_16i8_to_2i64: 462; AVX: # %bb.0: # %entry 463; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 464; AVX-NEXT: retq 465; 466; X86-SSE2-LABEL: sext_16i8_to_2i64: 467; X86-SSE2: # %bb.0: # %entry 468; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 469; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 470; X86-SSE2-NEXT: pxor %xmm1, %xmm1 471; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 472; X86-SSE2-NEXT: psrad $24, %xmm0 473; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 474; X86-SSE2-NEXT: retl 475; 476; X86-SSE41-LABEL: sext_16i8_to_2i64: 477; X86-SSE41: # %bb.0: # %entry 478; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm0 479; X86-SSE41-NEXT: retl 480entry: 481 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 482 %C = sext <2 x i8> %B to <2 x i64> 483 ret <2 x i64> %C 484} 485 486define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 487; SSE2-LABEL: sext_16i8_to_4i64: 488; SSE2: # %bb.0: # %entry 489; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 490; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 491; SSE2-NEXT: psrad $24, %xmm1 492; SSE2-NEXT: pxor %xmm2, %xmm2 493; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 494; SSE2-NEXT: movdqa %xmm1, %xmm0 495; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 496; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 497; SSE2-NEXT: retq 498; 499; SSSE3-LABEL: sext_16i8_to_4i64: 500; SSSE3: # %bb.0: # %entry 501; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 502; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 503; SSSE3-NEXT: psrad $24, %xmm1 504; SSSE3-NEXT: pxor %xmm2, %xmm2 505; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 506; SSSE3-NEXT: movdqa %xmm1, %xmm0 507; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 508; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 509; SSSE3-NEXT: retq 510; 511; SSE41-LABEL: sext_16i8_to_4i64: 512; SSE41: # %bb.0: # %entry 513; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 514; SSE41-NEXT: psrld $16, %xmm0 515; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 516; SSE41-NEXT: movdqa %xmm2, %xmm0 517; SSE41-NEXT: retq 518; 519; AVX1-LABEL: sext_16i8_to_4i64: 520; AVX1: # %bb.0: # %entry 521; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 522; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 523; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 524; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 525; AVX1-NEXT: retq 526; 527; AVX2-LABEL: sext_16i8_to_4i64: 528; AVX2: # %bb.0: # %entry 529; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 530; AVX2-NEXT: retq 531; 532; AVX512-LABEL: sext_16i8_to_4i64: 533; AVX512: # %bb.0: # %entry 534; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 535; AVX512-NEXT: retq 536; 537; X86-SSE2-LABEL: sext_16i8_to_4i64: 538; X86-SSE2: # %bb.0: # %entry 539; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 540; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 541; X86-SSE2-NEXT: psrad $24, %xmm1 542; X86-SSE2-NEXT: pxor %xmm2, %xmm2 543; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 544; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 545; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 546; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 547; X86-SSE2-NEXT: retl 548; 549; X86-SSE41-LABEL: sext_16i8_to_4i64: 550; X86-SSE41: # %bb.0: # %entry 551; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 552; X86-SSE41-NEXT: psrld $16, %xmm0 553; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 554; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 555; X86-SSE41-NEXT: retl 556entry: 557 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 558 %C = sext <4 x i8> %B to <4 x i64> 559 ret <4 x i64> %C 560} 561 562define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 563; SSE2-LABEL: sext_16i8_to_8i64: 564; SSE2: # %bb.0: # %entry 565; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 566; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 567; SSE2-NEXT: psrad $24, %xmm1 568; SSE2-NEXT: pxor %xmm4, %xmm4 569; SSE2-NEXT: pxor %xmm3, %xmm3 570; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 571; SSE2-NEXT: movdqa %xmm1, %xmm0 572; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 573; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 574; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 575; SSE2-NEXT: psrad $24, %xmm3 576; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 577; SSE2-NEXT: movdqa %xmm3, %xmm2 578; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 579; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 580; SSE2-NEXT: retq 581; 582; SSSE3-LABEL: sext_16i8_to_8i64: 583; SSSE3: # %bb.0: # %entry 584; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 585; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 586; SSSE3-NEXT: psrad $24, %xmm1 587; SSSE3-NEXT: pxor %xmm4, %xmm4 588; SSSE3-NEXT: pxor %xmm3, %xmm3 589; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 590; SSSE3-NEXT: movdqa %xmm1, %xmm0 591; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 592; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 593; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 594; SSSE3-NEXT: psrad $24, %xmm3 595; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 596; SSSE3-NEXT: movdqa %xmm3, %xmm2 597; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 598; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 599; SSSE3-NEXT: retq 600; 601; SSE41-LABEL: sext_16i8_to_8i64: 602; SSE41: # %bb.0: # %entry 603; SSE41-NEXT: pmovsxbq %xmm0, %xmm4 604; SSE41-NEXT: movdqa %xmm0, %xmm1 605; SSE41-NEXT: psrld $16, %xmm1 606; SSE41-NEXT: pmovsxbq %xmm1, %xmm1 607; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 608; SSE41-NEXT: pmovsxbq %xmm2, %xmm2 609; SSE41-NEXT: psrlq $48, %xmm0 610; SSE41-NEXT: pmovsxbq %xmm0, %xmm3 611; SSE41-NEXT: movdqa %xmm4, %xmm0 612; SSE41-NEXT: retq 613; 614; AVX1-LABEL: sext_16i8_to_8i64: 615; AVX1: # %bb.0: # %entry 616; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 617; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 618; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2 619; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 620; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 621; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 622; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 623; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 624; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 625; AVX1-NEXT: vmovaps %ymm2, %ymm0 626; AVX1-NEXT: retq 627; 628; AVX2-LABEL: sext_16i8_to_8i64: 629; AVX2: # %bb.0: # %entry 630; AVX2-NEXT: vpmovsxbq %xmm0, %ymm2 631; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 632; AVX2-NEXT: vpmovsxbq %xmm0, %ymm1 633; AVX2-NEXT: vmovdqa %ymm2, %ymm0 634; AVX2-NEXT: retq 635; 636; AVX512-LABEL: sext_16i8_to_8i64: 637; AVX512: # %bb.0: # %entry 638; AVX512-NEXT: vpmovsxbq %xmm0, %zmm0 639; AVX512-NEXT: retq 640; 641; X86-SSE2-LABEL: sext_16i8_to_8i64: 642; X86-SSE2: # %bb.0: # %entry 643; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 644; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 645; X86-SSE2-NEXT: psrad $24, %xmm1 646; X86-SSE2-NEXT: pxor %xmm4, %xmm4 647; X86-SSE2-NEXT: pxor %xmm3, %xmm3 648; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 649; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 650; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 651; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 652; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 653; X86-SSE2-NEXT: psrad $24, %xmm3 654; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 655; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 656; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 657; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 658; X86-SSE2-NEXT: retl 659; 660; X86-SSE41-LABEL: sext_16i8_to_8i64: 661; X86-SSE41: # %bb.0: # %entry 662; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm4 663; X86-SSE41-NEXT: movdqa %xmm0, %xmm1 664; X86-SSE41-NEXT: psrld $16, %xmm1 665; X86-SSE41-NEXT: pmovsxbq %xmm1, %xmm1 666; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 667; X86-SSE41-NEXT: pmovsxbq %xmm2, %xmm2 668; X86-SSE41-NEXT: psrlq $48, %xmm0 669; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm3 670; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 671; X86-SSE41-NEXT: retl 672entry: 673 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 674 %C = sext <8 x i8> %B to <8 x i64> 675 ret <8 x i64> %C 676} 677 678define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 679; SSE2-LABEL: sext_8i16_to_4i32: 680; SSE2: # %bb.0: # %entry 681; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 682; SSE2-NEXT: psrad $16, %xmm0 683; SSE2-NEXT: retq 684; 685; SSSE3-LABEL: sext_8i16_to_4i32: 686; SSSE3: # %bb.0: # %entry 687; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 688; SSSE3-NEXT: psrad $16, %xmm0 689; SSSE3-NEXT: retq 690; 691; SSE41-LABEL: sext_8i16_to_4i32: 692; SSE41: # %bb.0: # %entry 693; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 694; SSE41-NEXT: retq 695; 696; AVX-LABEL: sext_8i16_to_4i32: 697; AVX: # %bb.0: # %entry 698; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 699; AVX-NEXT: retq 700; 701; X86-SSE2-LABEL: sext_8i16_to_4i32: 702; X86-SSE2: # %bb.0: # %entry 703; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 704; X86-SSE2-NEXT: psrad $16, %xmm0 705; X86-SSE2-NEXT: retl 706; 707; X86-SSE41-LABEL: sext_8i16_to_4i32: 708; X86-SSE41: # %bb.0: # %entry 709; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm0 710; X86-SSE41-NEXT: retl 711entry: 712 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 713 %C = sext <4 x i16> %B to <4 x i32> 714 ret <4 x i32> %C 715} 716 717define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 718; SSE2-LABEL: sext_8i16_to_8i32: 719; SSE2: # %bb.0: # %entry 720; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 721; SSE2-NEXT: psrad $16, %xmm2 722; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 723; SSE2-NEXT: psrad $16, %xmm1 724; SSE2-NEXT: movdqa %xmm2, %xmm0 725; SSE2-NEXT: retq 726; 727; SSSE3-LABEL: sext_8i16_to_8i32: 728; SSSE3: # %bb.0: # %entry 729; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 730; SSSE3-NEXT: psrad $16, %xmm2 731; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 732; SSSE3-NEXT: psrad $16, %xmm1 733; SSSE3-NEXT: movdqa %xmm2, %xmm0 734; SSSE3-NEXT: retq 735; 736; SSE41-LABEL: sext_8i16_to_8i32: 737; SSE41: # %bb.0: # %entry 738; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 739; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 740; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 741; SSE41-NEXT: movdqa %xmm2, %xmm0 742; SSE41-NEXT: retq 743; 744; AVX1-LABEL: sext_8i16_to_8i32: 745; AVX1: # %bb.0: # %entry 746; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 747; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 748; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 749; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 750; AVX1-NEXT: retq 751; 752; AVX2-LABEL: sext_8i16_to_8i32: 753; AVX2: # %bb.0: # %entry 754; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 755; AVX2-NEXT: retq 756; 757; AVX512-LABEL: sext_8i16_to_8i32: 758; AVX512: # %bb.0: # %entry 759; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 760; AVX512-NEXT: retq 761; 762; X86-SSE2-LABEL: sext_8i16_to_8i32: 763; X86-SSE2: # %bb.0: # %entry 764; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 765; X86-SSE2-NEXT: psrad $16, %xmm2 766; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 767; X86-SSE2-NEXT: psrad $16, %xmm1 768; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 769; X86-SSE2-NEXT: retl 770; 771; X86-SSE41-LABEL: sext_8i16_to_8i32: 772; X86-SSE41: # %bb.0: # %entry 773; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm2 774; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 775; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm1 776; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 777; X86-SSE41-NEXT: retl 778entry: 779 %B = sext <8 x i16> %A to <8 x i32> 780 ret <8 x i32> %B 781} 782 783define <16 x i32> @sext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 784; SSE2-LABEL: sext_16i16_to_16i32: 785; SSE2: # %bb.0: # %entry 786; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 787; SSE2-NEXT: psrad $16, %xmm4 788; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 789; SSE2-NEXT: psrad $16, %xmm5 790; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 791; SSE2-NEXT: psrad $16, %xmm2 792; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 793; SSE2-NEXT: psrad $16, %xmm3 794; SSE2-NEXT: movdqa %xmm4, %xmm0 795; SSE2-NEXT: movdqa %xmm5, %xmm1 796; SSE2-NEXT: retq 797; 798; SSSE3-LABEL: sext_16i16_to_16i32: 799; SSSE3: # %bb.0: # %entry 800; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 801; SSSE3-NEXT: psrad $16, %xmm4 802; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 803; SSSE3-NEXT: psrad $16, %xmm5 804; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 805; SSSE3-NEXT: psrad $16, %xmm2 806; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 807; SSSE3-NEXT: psrad $16, %xmm3 808; SSSE3-NEXT: movdqa %xmm4, %xmm0 809; SSSE3-NEXT: movdqa %xmm5, %xmm1 810; SSSE3-NEXT: retq 811; 812; SSE41-LABEL: sext_16i16_to_16i32: 813; SSE41: # %bb.0: # %entry 814; SSE41-NEXT: pmovsxwd %xmm0, %xmm5 815; SSE41-NEXT: pmovsxwd %xmm1, %xmm2 816; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 817; SSE41-NEXT: pmovsxwd %xmm0, %xmm4 818; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 819; SSE41-NEXT: pmovsxwd %xmm0, %xmm3 820; SSE41-NEXT: movdqa %xmm5, %xmm0 821; SSE41-NEXT: movdqa %xmm4, %xmm1 822; SSE41-NEXT: retq 823; 824; AVX1-LABEL: sext_16i16_to_16i32: 825; AVX1: # %bb.0: # %entry 826; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 827; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 828; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 829; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 830; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 831; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 832; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 833; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 834; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 835; AVX1-NEXT: vmovaps %ymm2, %ymm0 836; AVX1-NEXT: retq 837; 838; AVX2-LABEL: sext_16i16_to_16i32: 839; AVX2: # %bb.0: # %entry 840; AVX2-NEXT: vpmovsxwd %xmm0, %ymm2 841; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 842; AVX2-NEXT: vpmovsxwd %xmm0, %ymm1 843; AVX2-NEXT: vmovdqa %ymm2, %ymm0 844; AVX2-NEXT: retq 845; 846; AVX512-LABEL: sext_16i16_to_16i32: 847; AVX512: # %bb.0: # %entry 848; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 849; AVX512-NEXT: retq 850; 851; X86-SSE2-LABEL: sext_16i16_to_16i32: 852; X86-SSE2: # %bb.0: # %entry 853; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 854; X86-SSE2-NEXT: psrad $16, %xmm4 855; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 856; X86-SSE2-NEXT: psrad $16, %xmm5 857; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 858; X86-SSE2-NEXT: psrad $16, %xmm2 859; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 860; X86-SSE2-NEXT: psrad $16, %xmm3 861; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 862; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 863; X86-SSE2-NEXT: retl 864; 865; X86-SSE41-LABEL: sext_16i16_to_16i32: 866; X86-SSE41: # %bb.0: # %entry 867; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm5 868; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm2 869; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 870; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm4 871; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 872; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm3 873; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 874; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 875; X86-SSE41-NEXT: retl 876entry: 877 %B = sext <16 x i16> %A to <16 x i32> 878 ret <16 x i32> %B 879} 880 881define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 882; SSE2-LABEL: sext_8i16_to_2i64: 883; SSE2: # %bb.0: # %entry 884; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 885; SSE2-NEXT: pxor %xmm1, %xmm1 886; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 887; SSE2-NEXT: psrad $16, %xmm0 888; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 889; SSE2-NEXT: retq 890; 891; SSSE3-LABEL: sext_8i16_to_2i64: 892; SSSE3: # %bb.0: # %entry 893; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 894; SSSE3-NEXT: pxor %xmm1, %xmm1 895; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 896; SSSE3-NEXT: psrad $16, %xmm0 897; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 898; SSSE3-NEXT: retq 899; 900; SSE41-LABEL: sext_8i16_to_2i64: 901; SSE41: # %bb.0: # %entry 902; SSE41-NEXT: pmovsxwq %xmm0, %xmm0 903; SSE41-NEXT: retq 904; 905; AVX-LABEL: sext_8i16_to_2i64: 906; AVX: # %bb.0: # %entry 907; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 908; AVX-NEXT: retq 909; 910; X86-SSE2-LABEL: sext_8i16_to_2i64: 911; X86-SSE2: # %bb.0: # %entry 912; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 913; X86-SSE2-NEXT: pxor %xmm1, %xmm1 914; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 915; X86-SSE2-NEXT: psrad $16, %xmm0 916; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 917; X86-SSE2-NEXT: retl 918; 919; X86-SSE41-LABEL: sext_8i16_to_2i64: 920; X86-SSE41: # %bb.0: # %entry 921; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm0 922; X86-SSE41-NEXT: retl 923entry: 924 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 925 %C = sext <2 x i16> %B to <2 x i64> 926 ret <2 x i64> %C 927} 928 929define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 930; SSE2-LABEL: sext_8i16_to_4i64: 931; SSE2: # %bb.0: # %entry 932; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 933; SSE2-NEXT: psrad $16, %xmm1 934; SSE2-NEXT: pxor %xmm2, %xmm2 935; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 936; SSE2-NEXT: movdqa %xmm1, %xmm0 937; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 938; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 939; SSE2-NEXT: retq 940; 941; SSSE3-LABEL: sext_8i16_to_4i64: 942; SSSE3: # %bb.0: # %entry 943; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 944; SSSE3-NEXT: psrad $16, %xmm1 945; SSSE3-NEXT: pxor %xmm2, %xmm2 946; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 947; SSSE3-NEXT: movdqa %xmm1, %xmm0 948; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 949; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 950; SSSE3-NEXT: retq 951; 952; SSE41-LABEL: sext_8i16_to_4i64: 953; SSE41: # %bb.0: # %entry 954; SSE41-NEXT: pmovsxwq %xmm0, %xmm2 955; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 956; SSE41-NEXT: pmovsxwq %xmm0, %xmm1 957; SSE41-NEXT: movdqa %xmm2, %xmm0 958; SSE41-NEXT: retq 959; 960; AVX1-LABEL: sext_8i16_to_4i64: 961; AVX1: # %bb.0: # %entry 962; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 963; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 964; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 965; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 966; AVX1-NEXT: retq 967; 968; AVX2-LABEL: sext_8i16_to_4i64: 969; AVX2: # %bb.0: # %entry 970; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 971; AVX2-NEXT: retq 972; 973; AVX512-LABEL: sext_8i16_to_4i64: 974; AVX512: # %bb.0: # %entry 975; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0 976; AVX512-NEXT: retq 977; 978; X86-SSE2-LABEL: sext_8i16_to_4i64: 979; X86-SSE2: # %bb.0: # %entry 980; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 981; X86-SSE2-NEXT: psrad $16, %xmm1 982; X86-SSE2-NEXT: pxor %xmm2, %xmm2 983; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 984; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 985; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 986; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 987; X86-SSE2-NEXT: retl 988; 989; X86-SSE41-LABEL: sext_8i16_to_4i64: 990; X86-SSE41: # %bb.0: # %entry 991; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm2 992; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 993; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm1 994; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 995; X86-SSE41-NEXT: retl 996entry: 997 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 998 %C = sext <4 x i16> %B to <4 x i64> 999 ret <4 x i64> %C 1000} 1001 1002define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 1003; SSE2-LABEL: sext_8i16_to_8i64: 1004; SSE2: # %bb.0: # %entry 1005; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1006; SSE2-NEXT: psrad $16, %xmm1 1007; SSE2-NEXT: pxor %xmm5, %xmm5 1008; SSE2-NEXT: pxor %xmm2, %xmm2 1009; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1010; SSE2-NEXT: movdqa %xmm1, %xmm4 1011; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1012; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1013; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1014; SSE2-NEXT: psrad $16, %xmm3 1015; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1016; SSE2-NEXT: movdqa %xmm3, %xmm2 1017; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1018; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1019; SSE2-NEXT: movdqa %xmm4, %xmm0 1020; SSE2-NEXT: retq 1021; 1022; SSSE3-LABEL: sext_8i16_to_8i64: 1023; SSSE3: # %bb.0: # %entry 1024; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1025; SSSE3-NEXT: psrad $16, %xmm1 1026; SSSE3-NEXT: pxor %xmm5, %xmm5 1027; SSSE3-NEXT: pxor %xmm2, %xmm2 1028; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 1029; SSSE3-NEXT: movdqa %xmm1, %xmm4 1030; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1031; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1032; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1033; SSSE3-NEXT: psrad $16, %xmm3 1034; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 1035; SSSE3-NEXT: movdqa %xmm3, %xmm2 1036; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1037; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1038; SSSE3-NEXT: movdqa %xmm4, %xmm0 1039; SSSE3-NEXT: retq 1040; 1041; SSE41-LABEL: sext_8i16_to_8i64: 1042; SSE41: # %bb.0: # %entry 1043; SSE41-NEXT: pmovsxwq %xmm0, %xmm4 1044; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1045; SSE41-NEXT: pmovsxwq %xmm1, %xmm1 1046; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1047; SSE41-NEXT: pmovsxwq %xmm2, %xmm2 1048; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1049; SSE41-NEXT: pmovsxwq %xmm0, %xmm3 1050; SSE41-NEXT: movdqa %xmm4, %xmm0 1051; SSE41-NEXT: retq 1052; 1053; AVX1-LABEL: sext_8i16_to_8i64: 1054; AVX1: # %bb.0: # %entry 1055; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 1056; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 1057; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 1058; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 1059; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1060; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1 1061; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1062; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 1063; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1064; AVX1-NEXT: vmovaps %ymm2, %ymm0 1065; AVX1-NEXT: retq 1066; 1067; AVX2-LABEL: sext_8i16_to_8i64: 1068; AVX2: # %bb.0: # %entry 1069; AVX2-NEXT: vpmovsxwq %xmm0, %ymm2 1070; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1071; AVX2-NEXT: vpmovsxwq %xmm0, %ymm1 1072; AVX2-NEXT: vmovdqa %ymm2, %ymm0 1073; AVX2-NEXT: retq 1074; 1075; AVX512-LABEL: sext_8i16_to_8i64: 1076; AVX512: # %bb.0: # %entry 1077; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0 1078; AVX512-NEXT: retq 1079; 1080; X86-SSE2-LABEL: sext_8i16_to_8i64: 1081; X86-SSE2: # %bb.0: # %entry 1082; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1083; X86-SSE2-NEXT: psrad $16, %xmm1 1084; X86-SSE2-NEXT: pxor %xmm5, %xmm5 1085; X86-SSE2-NEXT: pxor %xmm2, %xmm2 1086; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1087; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1088; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1089; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1090; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1091; X86-SSE2-NEXT: psrad $16, %xmm3 1092; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1093; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 1094; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1095; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1096; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 1097; X86-SSE2-NEXT: retl 1098; 1099; X86-SSE41-LABEL: sext_8i16_to_8i64: 1100; X86-SSE41: # %bb.0: # %entry 1101; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm4 1102; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1103; X86-SSE41-NEXT: pmovsxwq %xmm1, %xmm1 1104; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1105; X86-SSE41-NEXT: pmovsxwq %xmm2, %xmm2 1106; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1107; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm3 1108; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 1109; X86-SSE41-NEXT: retl 1110entry: 1111 %B = sext <8 x i16> %A to <8 x i64> 1112 ret <8 x i64> %B 1113} 1114 1115define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1116; SSE2-LABEL: sext_4i32_to_2i64: 1117; SSE2: # %bb.0: # %entry 1118; SSE2-NEXT: pxor %xmm1, %xmm1 1119; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1120; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1121; SSE2-NEXT: retq 1122; 1123; SSSE3-LABEL: sext_4i32_to_2i64: 1124; SSSE3: # %bb.0: # %entry 1125; SSSE3-NEXT: pxor %xmm1, %xmm1 1126; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 1127; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1128; SSSE3-NEXT: retq 1129; 1130; SSE41-LABEL: sext_4i32_to_2i64: 1131; SSE41: # %bb.0: # %entry 1132; SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1133; SSE41-NEXT: retq 1134; 1135; AVX-LABEL: sext_4i32_to_2i64: 1136; AVX: # %bb.0: # %entry 1137; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 1138; AVX-NEXT: retq 1139; 1140; X86-SSE2-LABEL: sext_4i32_to_2i64: 1141; X86-SSE2: # %bb.0: # %entry 1142; X86-SSE2-NEXT: pxor %xmm1, %xmm1 1143; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1144; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1145; X86-SSE2-NEXT: retl 1146; 1147; X86-SSE41-LABEL: sext_4i32_to_2i64: 1148; X86-SSE41: # %bb.0: # %entry 1149; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1150; X86-SSE41-NEXT: retl 1151entry: 1152 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 1153 %C = sext <2 x i32> %B to <2 x i64> 1154 ret <2 x i64> %C 1155} 1156 1157define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1158; SSE2-LABEL: sext_4i32_to_4i64: 1159; SSE2: # %bb.0: # %entry 1160; SSE2-NEXT: pxor %xmm2, %xmm2 1161; SSE2-NEXT: pxor %xmm3, %xmm3 1162; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1163; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1164; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1165; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1166; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1167; SSE2-NEXT: retq 1168; 1169; SSSE3-LABEL: sext_4i32_to_4i64: 1170; SSSE3: # %bb.0: # %entry 1171; SSSE3-NEXT: pxor %xmm2, %xmm2 1172; SSSE3-NEXT: pxor %xmm3, %xmm3 1173; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 1174; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1175; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1176; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 1177; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1178; SSSE3-NEXT: retq 1179; 1180; SSE41-LABEL: sext_4i32_to_4i64: 1181; SSE41: # %bb.0: # %entry 1182; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1183; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1184; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1185; SSE41-NEXT: movdqa %xmm2, %xmm0 1186; SSE41-NEXT: retq 1187; 1188; AVX1-LABEL: sext_4i32_to_4i64: 1189; AVX1: # %bb.0: # %entry 1190; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1191; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1192; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1193; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1194; AVX1-NEXT: retq 1195; 1196; AVX2-LABEL: sext_4i32_to_4i64: 1197; AVX2: # %bb.0: # %entry 1198; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1199; AVX2-NEXT: retq 1200; 1201; AVX512-LABEL: sext_4i32_to_4i64: 1202; AVX512: # %bb.0: # %entry 1203; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 1204; AVX512-NEXT: retq 1205; 1206; X86-SSE2-LABEL: sext_4i32_to_4i64: 1207; X86-SSE2: # %bb.0: # %entry 1208; X86-SSE2-NEXT: pxor %xmm2, %xmm2 1209; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1210; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1211; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1212; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1213; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1214; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1215; X86-SSE2-NEXT: retl 1216; 1217; X86-SSE41-LABEL: sext_4i32_to_4i64: 1218; X86-SSE41: # %bb.0: # %entry 1219; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1220; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1221; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1222; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 1223; X86-SSE41-NEXT: retl 1224entry: 1225 %B = sext <4 x i32> %A to <4 x i64> 1226 ret <4 x i64> %B 1227} 1228 1229define <8 x i64> @sext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 1230; SSE2-LABEL: sext_8i32_to_8i64: 1231; SSE2: # %bb.0: # %entry 1232; SSE2-NEXT: movdqa %xmm1, %xmm2 1233; SSE2-NEXT: pxor %xmm4, %xmm4 1234; SSE2-NEXT: pxor %xmm3, %xmm3 1235; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1236; SSE2-NEXT: pxor %xmm5, %xmm5 1237; SSE2-NEXT: pcmpgtd %xmm1, %xmm5 1238; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1239; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1240; SSE2-NEXT: pxor %xmm3, %xmm3 1241; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1242; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1243; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1244; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1245; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1246; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1247; SSE2-NEXT: retq 1248; 1249; SSSE3-LABEL: sext_8i32_to_8i64: 1250; SSSE3: # %bb.0: # %entry 1251; SSSE3-NEXT: movdqa %xmm1, %xmm2 1252; SSSE3-NEXT: pxor %xmm4, %xmm4 1253; SSSE3-NEXT: pxor %xmm3, %xmm3 1254; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 1255; SSSE3-NEXT: pxor %xmm5, %xmm5 1256; SSSE3-NEXT: pcmpgtd %xmm1, %xmm5 1257; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1258; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1259; SSSE3-NEXT: pxor %xmm3, %xmm3 1260; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 1261; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1262; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1263; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1264; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 1265; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1266; SSSE3-NEXT: retq 1267; 1268; SSE41-LABEL: sext_8i32_to_8i64: 1269; SSE41: # %bb.0: # %entry 1270; SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1271; SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1272; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1273; SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1274; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1275; SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1276; SSE41-NEXT: movdqa %xmm5, %xmm0 1277; SSE41-NEXT: movdqa %xmm4, %xmm1 1278; SSE41-NEXT: retq 1279; 1280; AVX1-LABEL: sext_8i32_to_8i64: 1281; AVX1: # %bb.0: # %entry 1282; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1283; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1284; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 1285; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 1286; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1287; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1288; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1289; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1290; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1291; AVX1-NEXT: vmovaps %ymm2, %ymm0 1292; AVX1-NEXT: retq 1293; 1294; AVX2-LABEL: sext_8i32_to_8i64: 1295; AVX2: # %bb.0: # %entry 1296; AVX2-NEXT: vpmovsxdq %xmm0, %ymm2 1297; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1298; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 1299; AVX2-NEXT: vmovdqa %ymm2, %ymm0 1300; AVX2-NEXT: retq 1301; 1302; AVX512-LABEL: sext_8i32_to_8i64: 1303; AVX512: # %bb.0: # %entry 1304; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 1305; AVX512-NEXT: retq 1306; 1307; X86-SSE2-LABEL: sext_8i32_to_8i64: 1308; X86-SSE2: # %bb.0: # %entry 1309; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1310; X86-SSE2-NEXT: pxor %xmm4, %xmm4 1311; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1312; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1313; X86-SSE2-NEXT: pxor %xmm5, %xmm5 1314; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm5 1315; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1316; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1317; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1318; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1319; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1320; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1321; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1322; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1323; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1324; X86-SSE2-NEXT: retl 1325; 1326; X86-SSE41-LABEL: sext_8i32_to_8i64: 1327; X86-SSE41: # %bb.0: # %entry 1328; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1329; X86-SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1330; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1331; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1332; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1333; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1334; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 1335; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 1336; X86-SSE41-NEXT: retl 1337entry: 1338 %B = sext <8 x i32> %A to <8 x i64> 1339 ret <8 x i64> %B 1340} 1341 1342define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { 1343; SSE-LABEL: load_sext_2i1_to_2i64: 1344; SSE: # %bb.0: # %entry 1345; SSE-NEXT: movb (%rdi), %al 1346; SSE-NEXT: movzbl %al, %ecx 1347; SSE-NEXT: shrb %al 1348; SSE-NEXT: movzbl %al, %eax 1349; SSE-NEXT: negq %rax 1350; SSE-NEXT: movq %rax, %xmm1 1351; SSE-NEXT: andl $1, %ecx 1352; SSE-NEXT: negq %rcx 1353; SSE-NEXT: movq %rcx, %xmm0 1354; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1355; SSE-NEXT: retq 1356; 1357; AVX1-LABEL: load_sext_2i1_to_2i64: 1358; AVX1: # %bb.0: # %entry 1359; AVX1-NEXT: movb (%rdi), %al 1360; AVX1-NEXT: movzbl %al, %ecx 1361; AVX1-NEXT: shrb %al 1362; AVX1-NEXT: movzbl %al, %eax 1363; AVX1-NEXT: negq %rax 1364; AVX1-NEXT: vmovq %rax, %xmm0 1365; AVX1-NEXT: andl $1, %ecx 1366; AVX1-NEXT: negq %rcx 1367; AVX1-NEXT: vmovq %rcx, %xmm1 1368; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1369; AVX1-NEXT: retq 1370; 1371; AVX2-LABEL: load_sext_2i1_to_2i64: 1372; AVX2: # %bb.0: # %entry 1373; AVX2-NEXT: movb (%rdi), %al 1374; AVX2-NEXT: movzbl %al, %ecx 1375; AVX2-NEXT: shrb %al 1376; AVX2-NEXT: movzbl %al, %eax 1377; AVX2-NEXT: negq %rax 1378; AVX2-NEXT: vmovq %rax, %xmm0 1379; AVX2-NEXT: andl $1, %ecx 1380; AVX2-NEXT: negq %rcx 1381; AVX2-NEXT: vmovq %rcx, %xmm1 1382; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1383; AVX2-NEXT: retq 1384; 1385; AVX512-LABEL: load_sext_2i1_to_2i64: 1386; AVX512: # %bb.0: # %entry 1387; AVX512-NEXT: kmovw (%rdi), %k1 1388; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1389; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1390; AVX512-NEXT: vzeroupper 1391; AVX512-NEXT: retq 1392; 1393; X86-SSE2-LABEL: load_sext_2i1_to_2i64: 1394; X86-SSE2: # %bb.0: # %entry 1395; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1396; X86-SSE2-NEXT: movb (%eax), %al 1397; X86-SSE2-NEXT: movzbl %al, %ecx 1398; X86-SSE2-NEXT: shrb %al 1399; X86-SSE2-NEXT: movzbl %al, %eax 1400; X86-SSE2-NEXT: negl %eax 1401; X86-SSE2-NEXT: movd %eax, %xmm0 1402; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] 1403; X86-SSE2-NEXT: andl $1, %ecx 1404; X86-SSE2-NEXT: negl %ecx 1405; X86-SSE2-NEXT: movd %ecx, %xmm0 1406; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1407; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1408; X86-SSE2-NEXT: retl 1409; 1410; X86-SSE41-LABEL: load_sext_2i1_to_2i64: 1411; X86-SSE41: # %bb.0: # %entry 1412; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1413; X86-SSE41-NEXT: movb (%eax), %al 1414; X86-SSE41-NEXT: movzbl %al, %ecx 1415; X86-SSE41-NEXT: andl $1, %ecx 1416; X86-SSE41-NEXT: negl %ecx 1417; X86-SSE41-NEXT: movd %ecx, %xmm0 1418; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1419; X86-SSE41-NEXT: shrb %al 1420; X86-SSE41-NEXT: movzbl %al, %eax 1421; X86-SSE41-NEXT: negl %eax 1422; X86-SSE41-NEXT: pinsrd $2, %eax, %xmm0 1423; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1424; X86-SSE41-NEXT: retl 1425entry: 1426 %X = load <2 x i1>, <2 x i1>* %ptr 1427 %Y = sext <2 x i1> %X to <2 x i64> 1428 ret <2 x i64> %Y 1429} 1430 1431define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) { 1432; SSE2-LABEL: load_sext_2i8_to_2i64: 1433; SSE2: # %bb.0: # %entry 1434; SSE2-NEXT: movzwl (%rdi), %eax 1435; SSE2-NEXT: movd %eax, %xmm0 1436; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1437; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1438; SSE2-NEXT: pxor %xmm1, %xmm1 1439; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1440; SSE2-NEXT: psrad $24, %xmm0 1441; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1442; SSE2-NEXT: retq 1443; 1444; SSSE3-LABEL: load_sext_2i8_to_2i64: 1445; SSSE3: # %bb.0: # %entry 1446; SSSE3-NEXT: movzwl (%rdi), %eax 1447; SSSE3-NEXT: movd %eax, %xmm0 1448; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1449; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1450; SSSE3-NEXT: pxor %xmm1, %xmm1 1451; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 1452; SSSE3-NEXT: psrad $24, %xmm0 1453; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1454; SSSE3-NEXT: retq 1455; 1456; SSE41-LABEL: load_sext_2i8_to_2i64: 1457; SSE41: # %bb.0: # %entry 1458; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1459; SSE41-NEXT: retq 1460; 1461; AVX-LABEL: load_sext_2i8_to_2i64: 1462; AVX: # %bb.0: # %entry 1463; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 1464; AVX-NEXT: retq 1465; 1466; X86-SSE2-LABEL: load_sext_2i8_to_2i64: 1467; X86-SSE2: # %bb.0: # %entry 1468; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1469; X86-SSE2-NEXT: movzwl (%eax), %eax 1470; X86-SSE2-NEXT: movd %eax, %xmm0 1471; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1472; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1473; X86-SSE2-NEXT: pxor %xmm1, %xmm1 1474; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1475; X86-SSE2-NEXT: psrad $24, %xmm0 1476; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1477; X86-SSE2-NEXT: retl 1478; 1479; X86-SSE41-LABEL: load_sext_2i8_to_2i64: 1480; X86-SSE41: # %bb.0: # %entry 1481; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1482; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1483; X86-SSE41-NEXT: retl 1484entry: 1485 %X = load <2 x i8>, <2 x i8>* %ptr 1486 %Y = sext <2 x i8> %X to <2 x i64> 1487 ret <2 x i64> %Y 1488} 1489 1490define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { 1491; SSE2-LABEL: load_sext_4i1_to_4i32: 1492; SSE2: # %bb.0: # %entry 1493; SSE2-NEXT: movb (%rdi), %al 1494; SSE2-NEXT: movl %eax, %ecx 1495; SSE2-NEXT: shrb $3, %cl 1496; SSE2-NEXT: movzbl %cl, %ecx 1497; SSE2-NEXT: negl %ecx 1498; SSE2-NEXT: movd %ecx, %xmm0 1499; SSE2-NEXT: movzbl %al, %ecx 1500; SSE2-NEXT: shrb $2, %al 1501; SSE2-NEXT: movzbl %al, %eax 1502; SSE2-NEXT: andl $1, %eax 1503; SSE2-NEXT: negl %eax 1504; SSE2-NEXT: movd %eax, %xmm1 1505; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1506; SSE2-NEXT: movl %ecx, %eax 1507; SSE2-NEXT: andl $1, %eax 1508; SSE2-NEXT: negl %eax 1509; SSE2-NEXT: movd %eax, %xmm0 1510; SSE2-NEXT: shrb %cl 1511; SSE2-NEXT: movzbl %cl, %eax 1512; SSE2-NEXT: andl $1, %eax 1513; SSE2-NEXT: negl %eax 1514; SSE2-NEXT: movd %eax, %xmm2 1515; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1516; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1517; SSE2-NEXT: retq 1518; 1519; SSSE3-LABEL: load_sext_4i1_to_4i32: 1520; SSSE3: # %bb.0: # %entry 1521; SSSE3-NEXT: movb (%rdi), %al 1522; SSSE3-NEXT: movl %eax, %ecx 1523; SSSE3-NEXT: shrb $3, %cl 1524; SSSE3-NEXT: movzbl %cl, %ecx 1525; SSSE3-NEXT: negl %ecx 1526; SSSE3-NEXT: movd %ecx, %xmm0 1527; SSSE3-NEXT: movzbl %al, %ecx 1528; SSSE3-NEXT: shrb $2, %al 1529; SSSE3-NEXT: movzbl %al, %eax 1530; SSSE3-NEXT: andl $1, %eax 1531; SSSE3-NEXT: negl %eax 1532; SSSE3-NEXT: movd %eax, %xmm1 1533; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1534; SSSE3-NEXT: movl %ecx, %eax 1535; SSSE3-NEXT: andl $1, %eax 1536; SSSE3-NEXT: negl %eax 1537; SSSE3-NEXT: movd %eax, %xmm0 1538; SSSE3-NEXT: shrb %cl 1539; SSSE3-NEXT: movzbl %cl, %eax 1540; SSSE3-NEXT: andl $1, %eax 1541; SSSE3-NEXT: negl %eax 1542; SSSE3-NEXT: movd %eax, %xmm2 1543; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1544; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1545; SSSE3-NEXT: retq 1546; 1547; SSE41-LABEL: load_sext_4i1_to_4i32: 1548; SSE41: # %bb.0: # %entry 1549; SSE41-NEXT: movb (%rdi), %al 1550; SSE41-NEXT: movzbl %al, %ecx 1551; SSE41-NEXT: shrb %al 1552; SSE41-NEXT: movzbl %al, %eax 1553; SSE41-NEXT: andl $1, %eax 1554; SSE41-NEXT: negl %eax 1555; SSE41-NEXT: movl %ecx, %edx 1556; SSE41-NEXT: andl $1, %edx 1557; SSE41-NEXT: negl %edx 1558; SSE41-NEXT: movd %edx, %xmm0 1559; SSE41-NEXT: pinsrd $1, %eax, %xmm0 1560; SSE41-NEXT: movl %ecx, %eax 1561; SSE41-NEXT: shrb $2, %al 1562; SSE41-NEXT: movzbl %al, %eax 1563; SSE41-NEXT: andl $1, %eax 1564; SSE41-NEXT: negl %eax 1565; SSE41-NEXT: pinsrd $2, %eax, %xmm0 1566; SSE41-NEXT: shrb $3, %cl 1567; SSE41-NEXT: movzbl %cl, %eax 1568; SSE41-NEXT: negl %eax 1569; SSE41-NEXT: pinsrd $3, %eax, %xmm0 1570; SSE41-NEXT: retq 1571; 1572; AVX1-LABEL: load_sext_4i1_to_4i32: 1573; AVX1: # %bb.0: # %entry 1574; AVX1-NEXT: movb (%rdi), %al 1575; AVX1-NEXT: movzbl %al, %ecx 1576; AVX1-NEXT: shrb %al 1577; AVX1-NEXT: movzbl %al, %eax 1578; AVX1-NEXT: andl $1, %eax 1579; AVX1-NEXT: negl %eax 1580; AVX1-NEXT: movl %ecx, %edx 1581; AVX1-NEXT: andl $1, %edx 1582; AVX1-NEXT: negl %edx 1583; AVX1-NEXT: vmovd %edx, %xmm0 1584; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1585; AVX1-NEXT: movl %ecx, %eax 1586; AVX1-NEXT: shrb $2, %al 1587; AVX1-NEXT: movzbl %al, %eax 1588; AVX1-NEXT: andl $1, %eax 1589; AVX1-NEXT: negl %eax 1590; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1591; AVX1-NEXT: shrb $3, %cl 1592; AVX1-NEXT: movzbl %cl, %eax 1593; AVX1-NEXT: negl %eax 1594; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1595; AVX1-NEXT: retq 1596; 1597; AVX2-LABEL: load_sext_4i1_to_4i32: 1598; AVX2: # %bb.0: # %entry 1599; AVX2-NEXT: movb (%rdi), %al 1600; AVX2-NEXT: movzbl %al, %ecx 1601; AVX2-NEXT: shrb %al 1602; AVX2-NEXT: movzbl %al, %eax 1603; AVX2-NEXT: andl $1, %eax 1604; AVX2-NEXT: negl %eax 1605; AVX2-NEXT: movl %ecx, %edx 1606; AVX2-NEXT: andl $1, %edx 1607; AVX2-NEXT: negl %edx 1608; AVX2-NEXT: vmovd %edx, %xmm0 1609; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1610; AVX2-NEXT: movl %ecx, %eax 1611; AVX2-NEXT: shrb $2, %al 1612; AVX2-NEXT: movzbl %al, %eax 1613; AVX2-NEXT: andl $1, %eax 1614; AVX2-NEXT: negl %eax 1615; AVX2-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1616; AVX2-NEXT: shrb $3, %cl 1617; AVX2-NEXT: movzbl %cl, %eax 1618; AVX2-NEXT: negl %eax 1619; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1620; AVX2-NEXT: retq 1621; 1622; AVX512-LABEL: load_sext_4i1_to_4i32: 1623; AVX512: # %bb.0: # %entry 1624; AVX512-NEXT: kmovw (%rdi), %k1 1625; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1626; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1627; AVX512-NEXT: vzeroupper 1628; AVX512-NEXT: retq 1629; 1630; X86-SSE2-LABEL: load_sext_4i1_to_4i32: 1631; X86-SSE2: # %bb.0: # %entry 1632; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1633; X86-SSE2-NEXT: movb (%eax), %al 1634; X86-SSE2-NEXT: movl %eax, %ecx 1635; X86-SSE2-NEXT: shrb $3, %cl 1636; X86-SSE2-NEXT: movzbl %cl, %ecx 1637; X86-SSE2-NEXT: negl %ecx 1638; X86-SSE2-NEXT: movd %ecx, %xmm0 1639; X86-SSE2-NEXT: movl %eax, %ecx 1640; X86-SSE2-NEXT: shrb $2, %cl 1641; X86-SSE2-NEXT: movzbl %cl, %ecx 1642; X86-SSE2-NEXT: andl $1, %ecx 1643; X86-SSE2-NEXT: negl %ecx 1644; X86-SSE2-NEXT: movd %ecx, %xmm1 1645; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1646; X86-SSE2-NEXT: movzbl %al, %ecx 1647; X86-SSE2-NEXT: andl $1, %ecx 1648; X86-SSE2-NEXT: negl %ecx 1649; X86-SSE2-NEXT: movd %ecx, %xmm0 1650; X86-SSE2-NEXT: shrb %al 1651; X86-SSE2-NEXT: movzbl %al, %eax 1652; X86-SSE2-NEXT: andl $1, %eax 1653; X86-SSE2-NEXT: negl %eax 1654; X86-SSE2-NEXT: movd %eax, %xmm2 1655; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1656; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1657; X86-SSE2-NEXT: retl 1658; 1659; X86-SSE41-LABEL: load_sext_4i1_to_4i32: 1660; X86-SSE41: # %bb.0: # %entry 1661; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1662; X86-SSE41-NEXT: movb (%eax), %al 1663; X86-SSE41-NEXT: movl %eax, %ecx 1664; X86-SSE41-NEXT: shrb %cl 1665; X86-SSE41-NEXT: movzbl %cl, %ecx 1666; X86-SSE41-NEXT: andl $1, %ecx 1667; X86-SSE41-NEXT: negl %ecx 1668; X86-SSE41-NEXT: movzbl %al, %edx 1669; X86-SSE41-NEXT: andl $1, %edx 1670; X86-SSE41-NEXT: negl %edx 1671; X86-SSE41-NEXT: movd %edx, %xmm0 1672; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1673; X86-SSE41-NEXT: movl %eax, %ecx 1674; X86-SSE41-NEXT: shrb $2, %cl 1675; X86-SSE41-NEXT: movzbl %cl, %ecx 1676; X86-SSE41-NEXT: andl $1, %ecx 1677; X86-SSE41-NEXT: negl %ecx 1678; X86-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 1679; X86-SSE41-NEXT: shrb $3, %al 1680; X86-SSE41-NEXT: movzbl %al, %eax 1681; X86-SSE41-NEXT: negl %eax 1682; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1683; X86-SSE41-NEXT: retl 1684entry: 1685 %X = load <4 x i1>, <4 x i1>* %ptr 1686 %Y = sext <4 x i1> %X to <4 x i32> 1687 ret <4 x i32> %Y 1688} 1689 1690define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) { 1691; SSE2-LABEL: load_sext_4i8_to_4i32: 1692; SSE2: # %bb.0: # %entry 1693; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1694; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1695; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1696; SSE2-NEXT: psrad $24, %xmm0 1697; SSE2-NEXT: retq 1698; 1699; SSSE3-LABEL: load_sext_4i8_to_4i32: 1700; SSSE3: # %bb.0: # %entry 1701; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1702; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1703; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1704; SSSE3-NEXT: psrad $24, %xmm0 1705; SSSE3-NEXT: retq 1706; 1707; SSE41-LABEL: load_sext_4i8_to_4i32: 1708; SSE41: # %bb.0: # %entry 1709; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 1710; SSE41-NEXT: retq 1711; 1712; AVX-LABEL: load_sext_4i8_to_4i32: 1713; AVX: # %bb.0: # %entry 1714; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 1715; AVX-NEXT: retq 1716; 1717; X86-SSE2-LABEL: load_sext_4i8_to_4i32: 1718; X86-SSE2: # %bb.0: # %entry 1719; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1720; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1721; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1722; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1723; X86-SSE2-NEXT: psrad $24, %xmm0 1724; X86-SSE2-NEXT: retl 1725; 1726; X86-SSE41-LABEL: load_sext_4i8_to_4i32: 1727; X86-SSE41: # %bb.0: # %entry 1728; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1729; X86-SSE41-NEXT: pmovsxbd (%eax), %xmm0 1730; X86-SSE41-NEXT: retl 1731entry: 1732 %X = load <4 x i8>, <4 x i8>* %ptr 1733 %Y = sext <4 x i8> %X to <4 x i32> 1734 ret <4 x i32> %Y 1735} 1736 1737define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { 1738; SSE2-LABEL: load_sext_4i1_to_4i64: 1739; SSE2: # %bb.0: # %entry 1740; SSE2-NEXT: movb (%rdi), %al 1741; SSE2-NEXT: movl %eax, %ecx 1742; SSE2-NEXT: shrb %cl 1743; SSE2-NEXT: andb $1, %cl 1744; SSE2-NEXT: movzbl %cl, %ecx 1745; SSE2-NEXT: movl %eax, %edx 1746; SSE2-NEXT: andb $1, %dl 1747; SSE2-NEXT: movzbl %dl, %edx 1748; SSE2-NEXT: movd %edx, %xmm1 1749; SSE2-NEXT: pinsrw $2, %ecx, %xmm1 1750; SSE2-NEXT: movl %eax, %ecx 1751; SSE2-NEXT: shrb $2, %cl 1752; SSE2-NEXT: andb $1, %cl 1753; SSE2-NEXT: movzbl %cl, %ecx 1754; SSE2-NEXT: pinsrw $4, %ecx, %xmm1 1755; SSE2-NEXT: shrb $3, %al 1756; SSE2-NEXT: movzbl %al, %eax 1757; SSE2-NEXT: pinsrw $6, %eax, %xmm1 1758; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1759; SSE2-NEXT: psllq $63, %xmm0 1760; SSE2-NEXT: psrad $31, %xmm0 1761; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1762; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1763; SSE2-NEXT: psllq $63, %xmm1 1764; SSE2-NEXT: psrad $31, %xmm1 1765; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1766; SSE2-NEXT: retq 1767; 1768; SSSE3-LABEL: load_sext_4i1_to_4i64: 1769; SSSE3: # %bb.0: # %entry 1770; SSSE3-NEXT: movb (%rdi), %al 1771; SSSE3-NEXT: movl %eax, %ecx 1772; SSSE3-NEXT: shrb %cl 1773; SSSE3-NEXT: andb $1, %cl 1774; SSSE3-NEXT: movzbl %cl, %ecx 1775; SSSE3-NEXT: movl %eax, %edx 1776; SSSE3-NEXT: andb $1, %dl 1777; SSSE3-NEXT: movzbl %dl, %edx 1778; SSSE3-NEXT: movd %edx, %xmm1 1779; SSSE3-NEXT: pinsrw $2, %ecx, %xmm1 1780; SSSE3-NEXT: movl %eax, %ecx 1781; SSSE3-NEXT: shrb $2, %cl 1782; SSSE3-NEXT: andb $1, %cl 1783; SSSE3-NEXT: movzbl %cl, %ecx 1784; SSSE3-NEXT: pinsrw $4, %ecx, %xmm1 1785; SSSE3-NEXT: shrb $3, %al 1786; SSSE3-NEXT: movzbl %al, %eax 1787; SSSE3-NEXT: pinsrw $6, %eax, %xmm1 1788; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1789; SSSE3-NEXT: psllq $63, %xmm0 1790; SSSE3-NEXT: psrad $31, %xmm0 1791; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1792; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1793; SSSE3-NEXT: psllq $63, %xmm1 1794; SSSE3-NEXT: psrad $31, %xmm1 1795; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1796; SSSE3-NEXT: retq 1797; 1798; SSE41-LABEL: load_sext_4i1_to_4i64: 1799; SSE41: # %bb.0: # %entry 1800; SSE41-NEXT: movb (%rdi), %al 1801; SSE41-NEXT: movl %eax, %ecx 1802; SSE41-NEXT: shrb %cl 1803; SSE41-NEXT: andb $1, %cl 1804; SSE41-NEXT: movzbl %cl, %ecx 1805; SSE41-NEXT: movl %eax, %edx 1806; SSE41-NEXT: andb $1, %dl 1807; SSE41-NEXT: movzbl %dl, %edx 1808; SSE41-NEXT: movd %edx, %xmm1 1809; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 1810; SSE41-NEXT: movl %eax, %ecx 1811; SSE41-NEXT: shrb $2, %cl 1812; SSE41-NEXT: andb $1, %cl 1813; SSE41-NEXT: movzbl %cl, %ecx 1814; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1815; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 1816; SSE41-NEXT: shrb $3, %al 1817; SSE41-NEXT: movzbl %al, %eax 1818; SSE41-NEXT: pinsrb $12, %eax, %xmm1 1819; SSE41-NEXT: psllq $63, %xmm0 1820; SSE41-NEXT: psrad $31, %xmm0 1821; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1822; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1823; SSE41-NEXT: psllq $63, %xmm1 1824; SSE41-NEXT: psrad $31, %xmm1 1825; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1826; SSE41-NEXT: retq 1827; 1828; AVX1-LABEL: load_sext_4i1_to_4i64: 1829; AVX1: # %bb.0: # %entry 1830; AVX1-NEXT: movb (%rdi), %al 1831; AVX1-NEXT: movzbl %al, %ecx 1832; AVX1-NEXT: shrb %al 1833; AVX1-NEXT: movzbl %al, %eax 1834; AVX1-NEXT: andl $1, %eax 1835; AVX1-NEXT: negl %eax 1836; AVX1-NEXT: movl %ecx, %edx 1837; AVX1-NEXT: andl $1, %edx 1838; AVX1-NEXT: negl %edx 1839; AVX1-NEXT: vmovd %edx, %xmm0 1840; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1841; AVX1-NEXT: movl %ecx, %eax 1842; AVX1-NEXT: shrb $2, %al 1843; AVX1-NEXT: movzbl %al, %eax 1844; AVX1-NEXT: andl $1, %eax 1845; AVX1-NEXT: negl %eax 1846; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1847; AVX1-NEXT: shrb $3, %cl 1848; AVX1-NEXT: movzbl %cl, %eax 1849; AVX1-NEXT: negl %eax 1850; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1851; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1852; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1853; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1854; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1855; AVX1-NEXT: retq 1856; 1857; AVX2-LABEL: load_sext_4i1_to_4i64: 1858; AVX2: # %bb.0: # %entry 1859; AVX2-NEXT: movb (%rdi), %al 1860; AVX2-NEXT: movl %eax, %ecx 1861; AVX2-NEXT: shrb $3, %cl 1862; AVX2-NEXT: movzbl %cl, %ecx 1863; AVX2-NEXT: negq %rcx 1864; AVX2-NEXT: vmovq %rcx, %xmm0 1865; AVX2-NEXT: movzbl %al, %ecx 1866; AVX2-NEXT: shrb $2, %al 1867; AVX2-NEXT: movzbl %al, %eax 1868; AVX2-NEXT: andl $1, %eax 1869; AVX2-NEXT: negq %rax 1870; AVX2-NEXT: vmovq %rax, %xmm1 1871; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1872; AVX2-NEXT: movl %ecx, %eax 1873; AVX2-NEXT: andl $1, %eax 1874; AVX2-NEXT: negq %rax 1875; AVX2-NEXT: vmovq %rax, %xmm1 1876; AVX2-NEXT: shrb %cl 1877; AVX2-NEXT: movzbl %cl, %eax 1878; AVX2-NEXT: andl $1, %eax 1879; AVX2-NEXT: negq %rax 1880; AVX2-NEXT: vmovq %rax, %xmm2 1881; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1882; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1883; AVX2-NEXT: retq 1884; 1885; AVX512-LABEL: load_sext_4i1_to_4i64: 1886; AVX512: # %bb.0: # %entry 1887; AVX512-NEXT: kmovw (%rdi), %k1 1888; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1889; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1890; AVX512-NEXT: retq 1891; 1892; X86-SSE2-LABEL: load_sext_4i1_to_4i64: 1893; X86-SSE2: # %bb.0: # %entry 1894; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1895; X86-SSE2-NEXT: movb (%eax), %al 1896; X86-SSE2-NEXT: movl %eax, %ecx 1897; X86-SSE2-NEXT: shrb %cl 1898; X86-SSE2-NEXT: andb $1, %cl 1899; X86-SSE2-NEXT: movzbl %cl, %ecx 1900; X86-SSE2-NEXT: movl %eax, %edx 1901; X86-SSE2-NEXT: andb $1, %dl 1902; X86-SSE2-NEXT: movzbl %dl, %edx 1903; X86-SSE2-NEXT: movd %edx, %xmm1 1904; X86-SSE2-NEXT: pinsrw $2, %ecx, %xmm1 1905; X86-SSE2-NEXT: movl %eax, %ecx 1906; X86-SSE2-NEXT: shrb $2, %cl 1907; X86-SSE2-NEXT: andb $1, %cl 1908; X86-SSE2-NEXT: movzbl %cl, %ecx 1909; X86-SSE2-NEXT: pinsrw $4, %ecx, %xmm1 1910; X86-SSE2-NEXT: shrb $3, %al 1911; X86-SSE2-NEXT: movzbl %al, %eax 1912; X86-SSE2-NEXT: pinsrw $6, %eax, %xmm1 1913; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1914; X86-SSE2-NEXT: psllq $63, %xmm0 1915; X86-SSE2-NEXT: psrad $31, %xmm0 1916; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1917; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1918; X86-SSE2-NEXT: psllq $63, %xmm1 1919; X86-SSE2-NEXT: psrad $31, %xmm1 1920; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1921; X86-SSE2-NEXT: retl 1922; 1923; X86-SSE41-LABEL: load_sext_4i1_to_4i64: 1924; X86-SSE41: # %bb.0: # %entry 1925; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1926; X86-SSE41-NEXT: movb (%eax), %al 1927; X86-SSE41-NEXT: movl %eax, %ecx 1928; X86-SSE41-NEXT: shrb %cl 1929; X86-SSE41-NEXT: andb $1, %cl 1930; X86-SSE41-NEXT: movzbl %cl, %ecx 1931; X86-SSE41-NEXT: movl %eax, %edx 1932; X86-SSE41-NEXT: andb $1, %dl 1933; X86-SSE41-NEXT: movzbl %dl, %edx 1934; X86-SSE41-NEXT: movd %edx, %xmm1 1935; X86-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 1936; X86-SSE41-NEXT: movl %eax, %ecx 1937; X86-SSE41-NEXT: shrb $2, %cl 1938; X86-SSE41-NEXT: andb $1, %cl 1939; X86-SSE41-NEXT: movzbl %cl, %ecx 1940; X86-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1941; X86-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 1942; X86-SSE41-NEXT: shrb $3, %al 1943; X86-SSE41-NEXT: movzbl %al, %eax 1944; X86-SSE41-NEXT: pinsrb $12, %eax, %xmm1 1945; X86-SSE41-NEXT: psllq $63, %xmm0 1946; X86-SSE41-NEXT: psrad $31, %xmm0 1947; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1948; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1949; X86-SSE41-NEXT: psllq $63, %xmm1 1950; X86-SSE41-NEXT: psrad $31, %xmm1 1951; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1952; X86-SSE41-NEXT: retl 1953entry: 1954 %X = load <4 x i1>, <4 x i1>* %ptr 1955 %Y = sext <4 x i1> %X to <4 x i64> 1956 ret <4 x i64> %Y 1957} 1958 1959define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) { 1960; SSE2-LABEL: load_sext_4i8_to_4i64: 1961; SSE2: # %bb.0: # %entry 1962; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1963; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1964; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1965; SSE2-NEXT: psrad $24, %xmm1 1966; SSE2-NEXT: pxor %xmm2, %xmm2 1967; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1968; SSE2-NEXT: movdqa %xmm1, %xmm0 1969; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1970; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1971; SSE2-NEXT: retq 1972; 1973; SSSE3-LABEL: load_sext_4i8_to_4i64: 1974; SSSE3: # %bb.0: # %entry 1975; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1976; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1977; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1978; SSSE3-NEXT: psrad $24, %xmm1 1979; SSSE3-NEXT: pxor %xmm2, %xmm2 1980; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 1981; SSSE3-NEXT: movdqa %xmm1, %xmm0 1982; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1983; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1984; SSSE3-NEXT: retq 1985; 1986; SSE41-LABEL: load_sext_4i8_to_4i64: 1987; SSE41: # %bb.0: # %entry 1988; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1989; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 1990; SSE41-NEXT: retq 1991; 1992; AVX1-LABEL: load_sext_4i8_to_4i64: 1993; AVX1: # %bb.0: # %entry 1994; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 1995; AVX1-NEXT: vpmovsxbq (%rdi), %xmm1 1996; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1997; AVX1-NEXT: retq 1998; 1999; AVX2-LABEL: load_sext_4i8_to_4i64: 2000; AVX2: # %bb.0: # %entry 2001; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2002; AVX2-NEXT: retq 2003; 2004; AVX512-LABEL: load_sext_4i8_to_4i64: 2005; AVX512: # %bb.0: # %entry 2006; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 2007; AVX512-NEXT: retq 2008; 2009; X86-SSE2-LABEL: load_sext_4i8_to_4i64: 2010; X86-SSE2: # %bb.0: # %entry 2011; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2012; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2013; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2014; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2015; X86-SSE2-NEXT: psrad $24, %xmm1 2016; X86-SSE2-NEXT: pxor %xmm2, %xmm2 2017; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2018; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2019; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2020; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2021; X86-SSE2-NEXT: retl 2022; 2023; X86-SSE41-LABEL: load_sext_4i8_to_4i64: 2024; X86-SSE41: # %bb.0: # %entry 2025; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2026; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 2027; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 2028; X86-SSE41-NEXT: retl 2029entry: 2030 %X = load <4 x i8>, <4 x i8>* %ptr 2031 %Y = sext <4 x i8> %X to <4 x i64> 2032 ret <4 x i64> %Y 2033} 2034 2035define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) { 2036; SSE2-LABEL: load_sext_4i8_to_4i64_extract: 2037; SSE2: # %bb.0: 2038; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2039; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2040; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2041; SSE2-NEXT: psrad $24, %xmm0 2042; SSE2-NEXT: pxor %xmm1, %xmm1 2043; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2044; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2045; SSE2-NEXT: retq 2046; 2047; SSSE3-LABEL: load_sext_4i8_to_4i64_extract: 2048; SSSE3: # %bb.0: 2049; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2050; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2051; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2052; SSSE3-NEXT: psrad $24, %xmm0 2053; SSSE3-NEXT: pxor %xmm1, %xmm1 2054; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 2055; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2056; SSSE3-NEXT: retq 2057; 2058; SSE41-LABEL: load_sext_4i8_to_4i64_extract: 2059; SSE41: # %bb.0: 2060; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0 2061; SSE41-NEXT: retq 2062; 2063; AVX1-LABEL: load_sext_4i8_to_4i64_extract: 2064; AVX1: # %bb.0: 2065; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 2066; AVX1-NEXT: retq 2067; 2068; AVX2-LABEL: load_sext_4i8_to_4i64_extract: 2069; AVX2: # %bb.0: 2070; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2071; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2072; AVX2-NEXT: vzeroupper 2073; AVX2-NEXT: retq 2074; 2075; AVX512-LABEL: load_sext_4i8_to_4i64_extract: 2076; AVX512: # %bb.0: 2077; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 2078; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2079; AVX512-NEXT: vzeroupper 2080; AVX512-NEXT: retq 2081; 2082; X86-SSE2-LABEL: load_sext_4i8_to_4i64_extract: 2083; X86-SSE2: # %bb.0: 2084; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2085; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2086; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2087; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2088; X86-SSE2-NEXT: psrad $24, %xmm0 2089; X86-SSE2-NEXT: pxor %xmm1, %xmm1 2090; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2091; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2092; X86-SSE2-NEXT: retl 2093; 2094; X86-SSE41-LABEL: load_sext_4i8_to_4i64_extract: 2095; X86-SSE41: # %bb.0: 2096; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2097; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0 2098; X86-SSE41-NEXT: retl 2099 %ld = load <4 x i8>, <4 x i8>* %ptr 2100 %sext = sext <4 x i8> %ld to <4 x i64> 2101 %extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 2102 ret <2 x i64> %extract 2103} 2104 2105define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { 2106; SSE-LABEL: load_sext_8i1_to_8i16: 2107; SSE: # %bb.0: # %entry 2108; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2109; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2110; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2111; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2112; SSE-NEXT: pand %xmm1, %xmm0 2113; SSE-NEXT: pcmpeqw %xmm1, %xmm0 2114; SSE-NEXT: retq 2115; 2116; AVX1-LABEL: load_sext_8i1_to_8i16: 2117; AVX1: # %bb.0: # %entry 2118; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2119; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2120; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2121; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2122; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2123; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2124; AVX1-NEXT: retq 2125; 2126; AVX2-LABEL: load_sext_8i1_to_8i16: 2127; AVX2: # %bb.0: # %entry 2128; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 2129; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2130; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2131; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2132; AVX2-NEXT: retq 2133; 2134; AVX512F-LABEL: load_sext_8i1_to_8i16: 2135; AVX512F: # %bb.0: # %entry 2136; AVX512F-NEXT: kmovw (%rdi), %k1 2137; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2138; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2139; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2140; AVX512F-NEXT: vzeroupper 2141; AVX512F-NEXT: retq 2142; 2143; AVX512BW-LABEL: load_sext_8i1_to_8i16: 2144; AVX512BW: # %bb.0: # %entry 2145; AVX512BW-NEXT: kmovw (%rdi), %k0 2146; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2147; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2148; AVX512BW-NEXT: vzeroupper 2149; AVX512BW-NEXT: retq 2150; 2151; X86-SSE-LABEL: load_sext_8i1_to_8i16: 2152; X86-SSE: # %bb.0: # %entry 2153; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2154; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2155; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2156; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2157; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2158; X86-SSE-NEXT: pand %xmm1, %xmm0 2159; X86-SSE-NEXT: pcmpeqw %xmm1, %xmm0 2160; X86-SSE-NEXT: retl 2161entry: 2162 %X = load <8 x i1>, <8 x i1>* %ptr 2163 %Y = sext <8 x i1> %X to <8 x i16> 2164 ret <8 x i16> %Y 2165} 2166 2167define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) { 2168; SSE2-LABEL: load_sext_8i8_to_8i16: 2169; SSE2: # %bb.0: # %entry 2170; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2171; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2172; SSE2-NEXT: psraw $8, %xmm0 2173; SSE2-NEXT: retq 2174; 2175; SSSE3-LABEL: load_sext_8i8_to_8i16: 2176; SSSE3: # %bb.0: # %entry 2177; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2178; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2179; SSSE3-NEXT: psraw $8, %xmm0 2180; SSSE3-NEXT: retq 2181; 2182; SSE41-LABEL: load_sext_8i8_to_8i16: 2183; SSE41: # %bb.0: # %entry 2184; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 2185; SSE41-NEXT: retq 2186; 2187; AVX-LABEL: load_sext_8i8_to_8i16: 2188; AVX: # %bb.0: # %entry 2189; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 2190; AVX-NEXT: retq 2191; 2192; X86-SSE2-LABEL: load_sext_8i8_to_8i16: 2193; X86-SSE2: # %bb.0: # %entry 2194; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2195; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2196; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2197; X86-SSE2-NEXT: psraw $8, %xmm0 2198; X86-SSE2-NEXT: retl 2199; 2200; X86-SSE41-LABEL: load_sext_8i8_to_8i16: 2201; X86-SSE41: # %bb.0: # %entry 2202; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2203; X86-SSE41-NEXT: pmovsxbw (%eax), %xmm0 2204; X86-SSE41-NEXT: retl 2205entry: 2206 %X = load <8 x i8>, <8 x i8>* %ptr 2207 %Y = sext <8 x i8> %X to <8 x i16> 2208 ret <8 x i16> %Y 2209} 2210 2211define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) { 2212; SSE2-LABEL: load_sext_8i8_to_8i64: 2213; SSE2: # %bb.0: # %entry 2214; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2215; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2216; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2217; SSE2-NEXT: psrad $24, %xmm1 2218; SSE2-NEXT: pxor %xmm4, %xmm4 2219; SSE2-NEXT: pxor %xmm3, %xmm3 2220; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 2221; SSE2-NEXT: movdqa %xmm1, %xmm0 2222; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2223; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2224; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2225; SSE2-NEXT: psrad $24, %xmm3 2226; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 2227; SSE2-NEXT: movdqa %xmm3, %xmm2 2228; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2229; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2230; SSE2-NEXT: retq 2231; 2232; SSSE3-LABEL: load_sext_8i8_to_8i64: 2233; SSSE3: # %bb.0: # %entry 2234; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2235; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2236; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2237; SSSE3-NEXT: psrad $24, %xmm1 2238; SSSE3-NEXT: pxor %xmm4, %xmm4 2239; SSSE3-NEXT: pxor %xmm3, %xmm3 2240; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 2241; SSSE3-NEXT: movdqa %xmm1, %xmm0 2242; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2243; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2244; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2245; SSSE3-NEXT: psrad $24, %xmm3 2246; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 2247; SSSE3-NEXT: movdqa %xmm3, %xmm2 2248; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2249; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2250; SSSE3-NEXT: retq 2251; 2252; SSE41-LABEL: load_sext_8i8_to_8i64: 2253; SSE41: # %bb.0: # %entry 2254; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 2255; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 2256; SSE41-NEXT: pmovsxbq 4(%rdi), %xmm2 2257; SSE41-NEXT: pmovsxbq 6(%rdi), %xmm3 2258; SSE41-NEXT: retq 2259; 2260; AVX1-LABEL: load_sext_8i8_to_8i64: 2261; AVX1: # %bb.0: # %entry 2262; AVX1-NEXT: vpmovsxbq 6(%rdi), %xmm1 2263; AVX1-NEXT: vpmovsxbq 4(%rdi), %xmm2 2264; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 2265; AVX1-NEXT: vpmovsxbq (%rdi), %xmm3 2266; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 2267; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2268; AVX1-NEXT: retq 2269; 2270; AVX2-LABEL: load_sext_8i8_to_8i64: 2271; AVX2: # %bb.0: # %entry 2272; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2273; AVX2-NEXT: vpmovsxbq 4(%rdi), %ymm1 2274; AVX2-NEXT: retq 2275; 2276; AVX512-LABEL: load_sext_8i8_to_8i64: 2277; AVX512: # %bb.0: # %entry 2278; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0 2279; AVX512-NEXT: retq 2280; 2281; X86-SSE2-LABEL: load_sext_8i8_to_8i64: 2282; X86-SSE2: # %bb.0: # %entry 2283; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2284; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2285; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2286; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2287; X86-SSE2-NEXT: psrad $24, %xmm1 2288; X86-SSE2-NEXT: pxor %xmm4, %xmm4 2289; X86-SSE2-NEXT: pxor %xmm3, %xmm3 2290; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 2291; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2292; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2293; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2294; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2295; X86-SSE2-NEXT: psrad $24, %xmm3 2296; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 2297; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 2298; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2299; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2300; X86-SSE2-NEXT: retl 2301; 2302; X86-SSE41-LABEL: load_sext_8i8_to_8i64: 2303; X86-SSE41: # %bb.0: # %entry 2304; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2305; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 2306; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 2307; X86-SSE41-NEXT: pmovsxbq 4(%eax), %xmm2 2308; X86-SSE41-NEXT: pmovsxbq 6(%eax), %xmm3 2309; X86-SSE41-NEXT: retl 2310entry: 2311 %X = load <8 x i8>, <8 x i8>* %ptr 2312 %Y = sext <8 x i8> %X to <8 x i64> 2313 ret <8 x i64> %Y 2314} 2315 2316define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { 2317; SSE-LABEL: load_sext_8i1_to_8i32: 2318; SSE: # %bb.0: # %entry 2319; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2320; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2321; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 2322; SSE-NEXT: movdqa %xmm1, %xmm0 2323; SSE-NEXT: pand %xmm2, %xmm0 2324; SSE-NEXT: pcmpeqd %xmm2, %xmm0 2325; SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 2326; SSE-NEXT: pand %xmm2, %xmm1 2327; SSE-NEXT: pcmpeqd %xmm2, %xmm1 2328; SSE-NEXT: retq 2329; 2330; AVX1-LABEL: load_sext_8i1_to_8i32: 2331; AVX1: # %bb.0: # %entry 2332; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2333; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 2334; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2335; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2336; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2337; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2338; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2339; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2340; AVX1-NEXT: retq 2341; 2342; AVX2-LABEL: load_sext_8i1_to_8i32: 2343; AVX2: # %bb.0: # %entry 2344; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0 2345; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 2346; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2347; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 2348; AVX2-NEXT: retq 2349; 2350; AVX512-LABEL: load_sext_8i1_to_8i32: 2351; AVX512: # %bb.0: # %entry 2352; AVX512-NEXT: kmovw (%rdi), %k1 2353; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2354; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2355; AVX512-NEXT: retq 2356; 2357; X86-SSE-LABEL: load_sext_8i1_to_8i32: 2358; X86-SSE: # %bb.0: # %entry 2359; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2360; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2361; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2362; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 2363; X86-SSE-NEXT: movdqa %xmm1, %xmm0 2364; X86-SSE-NEXT: pand %xmm2, %xmm0 2365; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm0 2366; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 2367; X86-SSE-NEXT: pand %xmm2, %xmm1 2368; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm1 2369; X86-SSE-NEXT: retl 2370entry: 2371 %X = load <8 x i1>, <8 x i1>* %ptr 2372 %Y = sext <8 x i1> %X to <8 x i32> 2373 ret <8 x i32> %Y 2374} 2375 2376define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { 2377; SSE2-LABEL: load_sext_8i8_to_8i32: 2378; SSE2: # %bb.0: # %entry 2379; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2380; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2381; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2382; SSE2-NEXT: psrad $24, %xmm0 2383; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2384; SSE2-NEXT: psrad $24, %xmm1 2385; SSE2-NEXT: retq 2386; 2387; SSSE3-LABEL: load_sext_8i8_to_8i32: 2388; SSSE3: # %bb.0: # %entry 2389; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2390; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2391; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2392; SSSE3-NEXT: psrad $24, %xmm0 2393; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2394; SSSE3-NEXT: psrad $24, %xmm1 2395; SSSE3-NEXT: retq 2396; 2397; SSE41-LABEL: load_sext_8i8_to_8i32: 2398; SSE41: # %bb.0: # %entry 2399; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 2400; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 2401; SSE41-NEXT: retq 2402; 2403; AVX1-LABEL: load_sext_8i8_to_8i32: 2404; AVX1: # %bb.0: # %entry 2405; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm0 2406; AVX1-NEXT: vpmovsxbd (%rdi), %xmm1 2407; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2408; AVX1-NEXT: retq 2409; 2410; AVX2-LABEL: load_sext_8i8_to_8i32: 2411; AVX2: # %bb.0: # %entry 2412; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 2413; AVX2-NEXT: retq 2414; 2415; AVX512-LABEL: load_sext_8i8_to_8i32: 2416; AVX512: # %bb.0: # %entry 2417; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0 2418; AVX512-NEXT: retq 2419; 2420; X86-SSE2-LABEL: load_sext_8i8_to_8i32: 2421; X86-SSE2: # %bb.0: # %entry 2422; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2423; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2424; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2425; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2426; X86-SSE2-NEXT: psrad $24, %xmm0 2427; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2428; X86-SSE2-NEXT: psrad $24, %xmm1 2429; X86-SSE2-NEXT: retl 2430; 2431; X86-SSE41-LABEL: load_sext_8i8_to_8i32: 2432; X86-SSE41: # %bb.0: # %entry 2433; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2434; X86-SSE41-NEXT: pmovsxbd (%eax), %xmm0 2435; X86-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1 2436; X86-SSE41-NEXT: retl 2437entry: 2438 %X = load <8 x i8>, <8 x i8>* %ptr 2439 %Y = sext <8 x i8> %X to <8 x i32> 2440 ret <8 x i32> %Y 2441} 2442 2443define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { 2444; SSE2-LABEL: load_sext_16i1_to_16i8: 2445; SSE2: # %bb.0: # %entry 2446; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2447; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2448; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 2449; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2450; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2451; SSE2-NEXT: pand %xmm1, %xmm0 2452; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 2453; SSE2-NEXT: retq 2454; 2455; SSSE3-LABEL: load_sext_16i1_to_16i8: 2456; SSSE3: # %bb.0: # %entry 2457; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2458; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2459; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2460; SSSE3-NEXT: pand %xmm1, %xmm0 2461; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 2462; SSSE3-NEXT: retq 2463; 2464; SSE41-LABEL: load_sext_16i1_to_16i8: 2465; SSE41: # %bb.0: # %entry 2466; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2467; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2468; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2469; SSE41-NEXT: pand %xmm1, %xmm0 2470; SSE41-NEXT: pcmpeqb %xmm1, %xmm0 2471; SSE41-NEXT: retq 2472; 2473; AVX1-LABEL: load_sext_16i1_to_16i8: 2474; AVX1: # %bb.0: # %entry 2475; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2476; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2477; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 2478; AVX1-NEXT: # xmm1 = mem[0,0] 2479; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2480; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2481; AVX1-NEXT: retq 2482; 2483; AVX2-LABEL: load_sext_16i1_to_16i8: 2484; AVX2: # %bb.0: # %entry 2485; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2486; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2487; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 2488; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2489; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2490; AVX2-NEXT: retq 2491; 2492; AVX512F-LABEL: load_sext_16i1_to_16i8: 2493; AVX512F: # %bb.0: # %entry 2494; AVX512F-NEXT: kmovw (%rdi), %k1 2495; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2496; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2497; AVX512F-NEXT: vzeroupper 2498; AVX512F-NEXT: retq 2499; 2500; AVX512BW-LABEL: load_sext_16i1_to_16i8: 2501; AVX512BW: # %bb.0: # %entry 2502; AVX512BW-NEXT: kmovw (%rdi), %k0 2503; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2504; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2505; AVX512BW-NEXT: vzeroupper 2506; AVX512BW-NEXT: retq 2507; 2508; X86-SSE2-LABEL: load_sext_16i1_to_16i8: 2509; X86-SSE2: # %bb.0: # %entry 2510; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2511; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2512; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2513; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 2514; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2515; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2516; X86-SSE2-NEXT: pand %xmm1, %xmm0 2517; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 2518; X86-SSE2-NEXT: retl 2519; 2520; X86-SSE41-LABEL: load_sext_16i1_to_16i8: 2521; X86-SSE41: # %bb.0: # %entry 2522; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2523; X86-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2524; X86-SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2525; X86-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2526; X86-SSE41-NEXT: pand %xmm1, %xmm0 2527; X86-SSE41-NEXT: pcmpeqb %xmm1, %xmm0 2528; X86-SSE41-NEXT: retl 2529entry: 2530 %X = load <16 x i1>, <16 x i1>* %ptr 2531 %Y = sext <16 x i1> %X to <16 x i8> 2532 ret <16 x i8> %Y 2533} 2534 2535define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { 2536; SSE-LABEL: load_sext_16i1_to_16i16: 2537; SSE: # %bb.0: # %entry 2538; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2539; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2540; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2541; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 2542; SSE-NEXT: movdqa %xmm1, %xmm0 2543; SSE-NEXT: pand %xmm2, %xmm0 2544; SSE-NEXT: pcmpeqw %xmm2, %xmm0 2545; SSE-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 2546; SSE-NEXT: pand %xmm2, %xmm1 2547; SSE-NEXT: pcmpeqw %xmm2, %xmm1 2548; SSE-NEXT: retq 2549; 2550; AVX1-LABEL: load_sext_16i1_to_16i16: 2551; AVX1: # %bb.0: # %entry 2552; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2553; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2554; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2555; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2556; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2557; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2558; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2559; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2560; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2561; AVX1-NEXT: retq 2562; 2563; AVX2-LABEL: load_sext_16i1_to_16i16: 2564; AVX2: # %bb.0: # %entry 2565; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 2566; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 2567; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2568; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2569; AVX2-NEXT: retq 2570; 2571; AVX512F-LABEL: load_sext_16i1_to_16i16: 2572; AVX512F: # %bb.0: # %entry 2573; AVX512F-NEXT: kmovw (%rdi), %k1 2574; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2575; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2576; AVX512F-NEXT: retq 2577; 2578; AVX512BW-LABEL: load_sext_16i1_to_16i16: 2579; AVX512BW: # %bb.0: # %entry 2580; AVX512BW-NEXT: kmovw (%rdi), %k0 2581; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2582; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2583; AVX512BW-NEXT: retq 2584; 2585; X86-SSE-LABEL: load_sext_16i1_to_16i16: 2586; X86-SSE: # %bb.0: # %entry 2587; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2588; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2589; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2590; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2591; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 2592; X86-SSE-NEXT: movdqa %xmm1, %xmm0 2593; X86-SSE-NEXT: pand %xmm2, %xmm0 2594; X86-SSE-NEXT: pcmpeqw %xmm2, %xmm0 2595; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 2596; X86-SSE-NEXT: pand %xmm2, %xmm1 2597; X86-SSE-NEXT: pcmpeqw %xmm2, %xmm1 2598; X86-SSE-NEXT: retl 2599entry: 2600 %X = load <16 x i1>, <16 x i1>* %ptr 2601 %Y = sext <16 x i1> %X to <16 x i16> 2602 ret <16 x i16> %Y 2603} 2604 2605define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { 2606; SSE-LABEL: load_sext_32i1_to_32i8: 2607; SSE: # %bb.0: # %entry 2608; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2609; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2610; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 2611; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2612; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2613; SSE-NEXT: pand %xmm2, %xmm0 2614; SSE-NEXT: pcmpeqb %xmm2, %xmm0 2615; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 2616; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 2617; SSE-NEXT: pand %xmm2, %xmm1 2618; SSE-NEXT: pcmpeqb %xmm2, %xmm1 2619; SSE-NEXT: retq 2620; 2621; AVX1-LABEL: load_sext_32i1_to_32i8: 2622; AVX1: # %bb.0: # %entry 2623; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2624; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2625; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 2626; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 2627; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2628; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 2629; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2630; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2631; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745] 2632; AVX1-NEXT: # xmm2 = mem[0,0] 2633; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 2634; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 2635; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2636; AVX1-NEXT: retq 2637; 2638; AVX2-LABEL: load_sext_32i1_to_32i8: 2639; AVX2: # %bb.0: # %entry 2640; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2641; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2642; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 2643; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 2644; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2645; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2646; AVX2-NEXT: retq 2647; 2648; AVX512F-LABEL: load_sext_32i1_to_32i8: 2649; AVX512F: # %bb.0: # %entry 2650; AVX512F-NEXT: kmovw (%rdi), %k1 2651; AVX512F-NEXT: kmovw 2(%rdi), %k2 2652; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2653; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2654; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 2655; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 2656; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2657; AVX512F-NEXT: retq 2658; 2659; AVX512BW-LABEL: load_sext_32i1_to_32i8: 2660; AVX512BW: # %bb.0: # %entry 2661; AVX512BW-NEXT: kmovd (%rdi), %k0 2662; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2663; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2664; AVX512BW-NEXT: retq 2665; 2666; X86-SSE-LABEL: load_sext_32i1_to_32i8: 2667; X86-SSE: # %bb.0: # %entry 2668; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2669; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2670; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2671; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 2672; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2673; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2674; X86-SSE-NEXT: pand %xmm2, %xmm0 2675; X86-SSE-NEXT: pcmpeqb %xmm2, %xmm0 2676; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 2677; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 2678; X86-SSE-NEXT: pand %xmm2, %xmm1 2679; X86-SSE-NEXT: pcmpeqb %xmm2, %xmm1 2680; X86-SSE-NEXT: retl 2681entry: 2682 %X = load <32 x i1>, <32 x i1>* %ptr 2683 %Y = sext <32 x i1> %X to <32 x i8> 2684 ret <32 x i8> %Y 2685} 2686 2687define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) { 2688; SSE2-LABEL: load_sext_16i8_to_16i16: 2689; SSE2: # %bb.0: # %entry 2690; SSE2-NEXT: movdqa (%rdi), %xmm1 2691; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2692; SSE2-NEXT: psraw $8, %xmm0 2693; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2694; SSE2-NEXT: psraw $8, %xmm1 2695; SSE2-NEXT: retq 2696; 2697; SSSE3-LABEL: load_sext_16i8_to_16i16: 2698; SSSE3: # %bb.0: # %entry 2699; SSSE3-NEXT: movdqa (%rdi), %xmm1 2700; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2701; SSSE3-NEXT: psraw $8, %xmm0 2702; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2703; SSSE3-NEXT: psraw $8, %xmm1 2704; SSSE3-NEXT: retq 2705; 2706; SSE41-LABEL: load_sext_16i8_to_16i16: 2707; SSE41: # %bb.0: # %entry 2708; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 2709; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1 2710; SSE41-NEXT: retq 2711; 2712; AVX1-LABEL: load_sext_16i8_to_16i16: 2713; AVX1: # %bb.0: # %entry 2714; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm0 2715; AVX1-NEXT: vpmovsxbw (%rdi), %xmm1 2716; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2717; AVX1-NEXT: retq 2718; 2719; AVX2-LABEL: load_sext_16i8_to_16i16: 2720; AVX2: # %bb.0: # %entry 2721; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0 2722; AVX2-NEXT: retq 2723; 2724; AVX512-LABEL: load_sext_16i8_to_16i16: 2725; AVX512: # %bb.0: # %entry 2726; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0 2727; AVX512-NEXT: retq 2728; 2729; X86-SSE2-LABEL: load_sext_16i8_to_16i16: 2730; X86-SSE2: # %bb.0: # %entry 2731; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2732; X86-SSE2-NEXT: movdqa (%eax), %xmm1 2733; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2734; X86-SSE2-NEXT: psraw $8, %xmm0 2735; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2736; X86-SSE2-NEXT: psraw $8, %xmm1 2737; X86-SSE2-NEXT: retl 2738; 2739; X86-SSE41-LABEL: load_sext_16i8_to_16i16: 2740; X86-SSE41: # %bb.0: # %entry 2741; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2742; X86-SSE41-NEXT: pmovsxbw (%eax), %xmm0 2743; X86-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1 2744; X86-SSE41-NEXT: retl 2745entry: 2746 %X = load <16 x i8>, <16 x i8>* %ptr 2747 %Y = sext <16 x i8> %X to <16 x i16> 2748 ret <16 x i16> %Y 2749} 2750 2751define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { 2752; SSE2-LABEL: load_sext_2i16_to_2i64: 2753; SSE2: # %bb.0: # %entry 2754; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2755; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2756; SSE2-NEXT: pxor %xmm1, %xmm1 2757; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2758; SSE2-NEXT: psrad $16, %xmm0 2759; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2760; SSE2-NEXT: retq 2761; 2762; SSSE3-LABEL: load_sext_2i16_to_2i64: 2763; SSSE3: # %bb.0: # %entry 2764; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2765; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2766; SSSE3-NEXT: pxor %xmm1, %xmm1 2767; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 2768; SSSE3-NEXT: psrad $16, %xmm0 2769; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2770; SSSE3-NEXT: retq 2771; 2772; SSE41-LABEL: load_sext_2i16_to_2i64: 2773; SSE41: # %bb.0: # %entry 2774; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 2775; SSE41-NEXT: retq 2776; 2777; AVX-LABEL: load_sext_2i16_to_2i64: 2778; AVX: # %bb.0: # %entry 2779; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 2780; AVX-NEXT: retq 2781; 2782; X86-SSE2-LABEL: load_sext_2i16_to_2i64: 2783; X86-SSE2: # %bb.0: # %entry 2784; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2785; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2786; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2787; X86-SSE2-NEXT: pxor %xmm1, %xmm1 2788; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2789; X86-SSE2-NEXT: psrad $16, %xmm0 2790; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2791; X86-SSE2-NEXT: retl 2792; 2793; X86-SSE41-LABEL: load_sext_2i16_to_2i64: 2794; X86-SSE41: # %bb.0: # %entry 2795; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2796; X86-SSE41-NEXT: pmovsxwq (%eax), %xmm0 2797; X86-SSE41-NEXT: retl 2798entry: 2799 %X = load <2 x i16>, <2 x i16>* %ptr 2800 %Y = sext <2 x i16> %X to <2 x i64> 2801 ret <2 x i64> %Y 2802} 2803 2804define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) { 2805; SSE2-LABEL: load_sext_4i16_to_4i32: 2806; SSE2: # %bb.0: # %entry 2807; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2808; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2809; SSE2-NEXT: psrad $16, %xmm0 2810; SSE2-NEXT: retq 2811; 2812; SSSE3-LABEL: load_sext_4i16_to_4i32: 2813; SSSE3: # %bb.0: # %entry 2814; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2815; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2816; SSSE3-NEXT: psrad $16, %xmm0 2817; SSSE3-NEXT: retq 2818; 2819; SSE41-LABEL: load_sext_4i16_to_4i32: 2820; SSE41: # %bb.0: # %entry 2821; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 2822; SSE41-NEXT: retq 2823; 2824; AVX-LABEL: load_sext_4i16_to_4i32: 2825; AVX: # %bb.0: # %entry 2826; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 2827; AVX-NEXT: retq 2828; 2829; X86-SSE2-LABEL: load_sext_4i16_to_4i32: 2830; X86-SSE2: # %bb.0: # %entry 2831; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2832; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2833; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2834; X86-SSE2-NEXT: psrad $16, %xmm0 2835; X86-SSE2-NEXT: retl 2836; 2837; X86-SSE41-LABEL: load_sext_4i16_to_4i32: 2838; X86-SSE41: # %bb.0: # %entry 2839; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2840; X86-SSE41-NEXT: pmovsxwd (%eax), %xmm0 2841; X86-SSE41-NEXT: retl 2842entry: 2843 %X = load <4 x i16>, <4 x i16>* %ptr 2844 %Y = sext <4 x i16> %X to <4 x i32> 2845 ret <4 x i32> %Y 2846} 2847 2848define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) { 2849; SSE2-LABEL: load_sext_4i16_to_4i64: 2850; SSE2: # %bb.0: # %entry 2851; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2852; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2853; SSE2-NEXT: psrad $16, %xmm1 2854; SSE2-NEXT: pxor %xmm2, %xmm2 2855; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2856; SSE2-NEXT: movdqa %xmm1, %xmm0 2857; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2858; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2859; SSE2-NEXT: retq 2860; 2861; SSSE3-LABEL: load_sext_4i16_to_4i64: 2862; SSSE3: # %bb.0: # %entry 2863; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2864; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2865; SSSE3-NEXT: psrad $16, %xmm1 2866; SSSE3-NEXT: pxor %xmm2, %xmm2 2867; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 2868; SSSE3-NEXT: movdqa %xmm1, %xmm0 2869; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2870; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2871; SSSE3-NEXT: retq 2872; 2873; SSE41-LABEL: load_sext_4i16_to_4i64: 2874; SSE41: # %bb.0: # %entry 2875; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 2876; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1 2877; SSE41-NEXT: retq 2878; 2879; AVX1-LABEL: load_sext_4i16_to_4i64: 2880; AVX1: # %bb.0: # %entry 2881; AVX1-NEXT: vpmovsxwq 4(%rdi), %xmm0 2882; AVX1-NEXT: vpmovsxwq (%rdi), %xmm1 2883; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2884; AVX1-NEXT: retq 2885; 2886; AVX2-LABEL: load_sext_4i16_to_4i64: 2887; AVX2: # %bb.0: # %entry 2888; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0 2889; AVX2-NEXT: retq 2890; 2891; AVX512-LABEL: load_sext_4i16_to_4i64: 2892; AVX512: # %bb.0: # %entry 2893; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0 2894; AVX512-NEXT: retq 2895; 2896; X86-SSE2-LABEL: load_sext_4i16_to_4i64: 2897; X86-SSE2: # %bb.0: # %entry 2898; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2899; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2900; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2901; X86-SSE2-NEXT: psrad $16, %xmm1 2902; X86-SSE2-NEXT: pxor %xmm2, %xmm2 2903; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2904; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2905; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2906; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2907; X86-SSE2-NEXT: retl 2908; 2909; X86-SSE41-LABEL: load_sext_4i16_to_4i64: 2910; X86-SSE41: # %bb.0: # %entry 2911; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2912; X86-SSE41-NEXT: pmovsxwq (%eax), %xmm0 2913; X86-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1 2914; X86-SSE41-NEXT: retl 2915entry: 2916 %X = load <4 x i16>, <4 x i16>* %ptr 2917 %Y = sext <4 x i16> %X to <4 x i64> 2918 ret <4 x i64> %Y 2919} 2920 2921define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) { 2922; SSE2-LABEL: load_sext_8i16_to_8i32: 2923; SSE2: # %bb.0: # %entry 2924; SSE2-NEXT: movdqa (%rdi), %xmm1 2925; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2926; SSE2-NEXT: psrad $16, %xmm0 2927; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2928; SSE2-NEXT: psrad $16, %xmm1 2929; SSE2-NEXT: retq 2930; 2931; SSSE3-LABEL: load_sext_8i16_to_8i32: 2932; SSSE3: # %bb.0: # %entry 2933; SSSE3-NEXT: movdqa (%rdi), %xmm1 2934; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2935; SSSE3-NEXT: psrad $16, %xmm0 2936; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2937; SSSE3-NEXT: psrad $16, %xmm1 2938; SSSE3-NEXT: retq 2939; 2940; SSE41-LABEL: load_sext_8i16_to_8i32: 2941; SSE41: # %bb.0: # %entry 2942; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 2943; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 2944; SSE41-NEXT: retq 2945; 2946; AVX1-LABEL: load_sext_8i16_to_8i32: 2947; AVX1: # %bb.0: # %entry 2948; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0 2949; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1 2950; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2951; AVX1-NEXT: retq 2952; 2953; AVX2-LABEL: load_sext_8i16_to_8i32: 2954; AVX2: # %bb.0: # %entry 2955; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 2956; AVX2-NEXT: retq 2957; 2958; AVX512-LABEL: load_sext_8i16_to_8i32: 2959; AVX512: # %bb.0: # %entry 2960; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0 2961; AVX512-NEXT: retq 2962; 2963; X86-SSE2-LABEL: load_sext_8i16_to_8i32: 2964; X86-SSE2: # %bb.0: # %entry 2965; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2966; X86-SSE2-NEXT: movdqa (%eax), %xmm1 2967; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2968; X86-SSE2-NEXT: psrad $16, %xmm0 2969; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2970; X86-SSE2-NEXT: psrad $16, %xmm1 2971; X86-SSE2-NEXT: retl 2972; 2973; X86-SSE41-LABEL: load_sext_8i16_to_8i32: 2974; X86-SSE41: # %bb.0: # %entry 2975; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2976; X86-SSE41-NEXT: pmovsxwd (%eax), %xmm0 2977; X86-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1 2978; X86-SSE41-NEXT: retl 2979entry: 2980 %X = load <8 x i16>, <8 x i16>* %ptr 2981 %Y = sext <8 x i16> %X to <8 x i32> 2982 ret <8 x i32> %Y 2983} 2984 2985define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) { 2986; SSE2-LABEL: load_sext_2i32_to_2i64: 2987; SSE2: # %bb.0: # %entry 2988; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2989; SSE2-NEXT: pxor %xmm1, %xmm1 2990; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2991; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2992; SSE2-NEXT: retq 2993; 2994; SSSE3-LABEL: load_sext_2i32_to_2i64: 2995; SSSE3: # %bb.0: # %entry 2996; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2997; SSSE3-NEXT: pxor %xmm1, %xmm1 2998; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 2999; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3000; SSSE3-NEXT: retq 3001; 3002; SSE41-LABEL: load_sext_2i32_to_2i64: 3003; SSE41: # %bb.0: # %entry 3004; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 3005; SSE41-NEXT: retq 3006; 3007; AVX-LABEL: load_sext_2i32_to_2i64: 3008; AVX: # %bb.0: # %entry 3009; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 3010; AVX-NEXT: retq 3011; 3012; X86-SSE2-LABEL: load_sext_2i32_to_2i64: 3013; X86-SSE2: # %bb.0: # %entry 3014; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3015; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3016; X86-SSE2-NEXT: pxor %xmm1, %xmm1 3017; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 3018; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3019; X86-SSE2-NEXT: retl 3020; 3021; X86-SSE41-LABEL: load_sext_2i32_to_2i64: 3022; X86-SSE41: # %bb.0: # %entry 3023; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3024; X86-SSE41-NEXT: pmovsxdq (%eax), %xmm0 3025; X86-SSE41-NEXT: retl 3026entry: 3027 %X = load <2 x i32>, <2 x i32>* %ptr 3028 %Y = sext <2 x i32> %X to <2 x i64> 3029 ret <2 x i64> %Y 3030} 3031 3032define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) { 3033; SSE2-LABEL: load_sext_4i32_to_4i64: 3034; SSE2: # %bb.0: # %entry 3035; SSE2-NEXT: movdqa (%rdi), %xmm0 3036; SSE2-NEXT: pxor %xmm2, %xmm2 3037; SSE2-NEXT: pxor %xmm3, %xmm3 3038; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3039; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3040; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3041; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3042; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3043; SSE2-NEXT: retq 3044; 3045; SSSE3-LABEL: load_sext_4i32_to_4i64: 3046; SSSE3: # %bb.0: # %entry 3047; SSSE3-NEXT: movdqa (%rdi), %xmm0 3048; SSSE3-NEXT: pxor %xmm2, %xmm2 3049; SSSE3-NEXT: pxor %xmm3, %xmm3 3050; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 3051; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3052; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3053; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3054; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3055; SSSE3-NEXT: retq 3056; 3057; SSE41-LABEL: load_sext_4i32_to_4i64: 3058; SSE41: # %bb.0: # %entry 3059; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 3060; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1 3061; SSE41-NEXT: retq 3062; 3063; AVX1-LABEL: load_sext_4i32_to_4i64: 3064; AVX1: # %bb.0: # %entry 3065; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm0 3066; AVX1-NEXT: vpmovsxdq (%rdi), %xmm1 3067; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3068; AVX1-NEXT: retq 3069; 3070; AVX2-LABEL: load_sext_4i32_to_4i64: 3071; AVX2: # %bb.0: # %entry 3072; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0 3073; AVX2-NEXT: retq 3074; 3075; AVX512-LABEL: load_sext_4i32_to_4i64: 3076; AVX512: # %bb.0: # %entry 3077; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0 3078; AVX512-NEXT: retq 3079; 3080; X86-SSE2-LABEL: load_sext_4i32_to_4i64: 3081; X86-SSE2: # %bb.0: # %entry 3082; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3083; X86-SSE2-NEXT: movdqa (%eax), %xmm0 3084; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3085; X86-SSE2-NEXT: pxor %xmm3, %xmm3 3086; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3087; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3088; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3089; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3090; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3091; X86-SSE2-NEXT: retl 3092; 3093; X86-SSE41-LABEL: load_sext_4i32_to_4i64: 3094; X86-SSE41: # %bb.0: # %entry 3095; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3096; X86-SSE41-NEXT: pmovsxdq (%eax), %xmm0 3097; X86-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1 3098; X86-SSE41-NEXT: retl 3099entry: 3100 %X = load <4 x i32>, <4 x i32>* %ptr 3101 %Y = sext <4 x i32> %X to <4 x i64> 3102 ret <4 x i64> %Y 3103} 3104 3105define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { 3106; SSE2-LABEL: sext_2i8_to_i32: 3107; SSE2: # %bb.0: # %entry 3108; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3109; SSE2-NEXT: psraw $8, %xmm0 3110; SSE2-NEXT: movd %xmm0, %eax 3111; SSE2-NEXT: retq 3112; 3113; SSSE3-LABEL: sext_2i8_to_i32: 3114; SSSE3: # %bb.0: # %entry 3115; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3116; SSSE3-NEXT: psraw $8, %xmm0 3117; SSSE3-NEXT: movd %xmm0, %eax 3118; SSSE3-NEXT: retq 3119; 3120; SSE41-LABEL: sext_2i8_to_i32: 3121; SSE41: # %bb.0: # %entry 3122; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 3123; SSE41-NEXT: movd %xmm0, %eax 3124; SSE41-NEXT: retq 3125; 3126; AVX-LABEL: sext_2i8_to_i32: 3127; AVX: # %bb.0: # %entry 3128; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 3129; AVX-NEXT: vmovd %xmm0, %eax 3130; AVX-NEXT: retq 3131; 3132; X86-SSE2-LABEL: sext_2i8_to_i32: 3133; X86-SSE2: # %bb.0: # %entry 3134; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3135; X86-SSE2-NEXT: psraw $8, %xmm0 3136; X86-SSE2-NEXT: movd %xmm0, %eax 3137; X86-SSE2-NEXT: retl 3138; 3139; X86-SSE41-LABEL: sext_2i8_to_i32: 3140; X86-SSE41: # %bb.0: # %entry 3141; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 3142; X86-SSE41-NEXT: movd %xmm0, %eax 3143; X86-SSE41-NEXT: retl 3144entry: 3145 %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 3146 %Ex = sext <2 x i8> %Shuf to <2 x i16> 3147 %Bc = bitcast <2 x i16> %Ex to i32 3148 ret i32 %Bc 3149} 3150 3151define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { 3152; SSE2-LABEL: sext_4i1_to_4i64: 3153; SSE2: # %bb.0: 3154; SSE2-NEXT: pslld $31, %xmm0 3155; SSE2-NEXT: psrad $31, %xmm0 3156; SSE2-NEXT: pxor %xmm2, %xmm2 3157; SSE2-NEXT: pxor %xmm3, %xmm3 3158; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3159; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3160; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3161; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3162; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3163; SSE2-NEXT: retq 3164; 3165; SSSE3-LABEL: sext_4i1_to_4i64: 3166; SSSE3: # %bb.0: 3167; SSSE3-NEXT: pslld $31, %xmm0 3168; SSSE3-NEXT: psrad $31, %xmm0 3169; SSSE3-NEXT: pxor %xmm2, %xmm2 3170; SSSE3-NEXT: pxor %xmm3, %xmm3 3171; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 3172; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3173; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3174; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3175; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3176; SSSE3-NEXT: retq 3177; 3178; SSE41-LABEL: sext_4i1_to_4i64: 3179; SSE41: # %bb.0: 3180; SSE41-NEXT: pslld $31, %xmm0 3181; SSE41-NEXT: psrad $31, %xmm0 3182; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3183; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3184; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3185; SSE41-NEXT: movdqa %xmm2, %xmm0 3186; SSE41-NEXT: retq 3187; 3188; AVX1-LABEL: sext_4i1_to_4i64: 3189; AVX1: # %bb.0: 3190; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 3191; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 3192; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 3193; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3194; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 3195; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3196; AVX1-NEXT: retq 3197; 3198; AVX2-LABEL: sext_4i1_to_4i64: 3199; AVX2: # %bb.0: 3200; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 3201; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 3202; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 3203; AVX2-NEXT: retq 3204; 3205; AVX512-LABEL: sext_4i1_to_4i64: 3206; AVX512: # %bb.0: 3207; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 3208; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 3209; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 3210; AVX512-NEXT: retq 3211; 3212; X86-SSE2-LABEL: sext_4i1_to_4i64: 3213; X86-SSE2: # %bb.0: 3214; X86-SSE2-NEXT: pslld $31, %xmm0 3215; X86-SSE2-NEXT: psrad $31, %xmm0 3216; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3217; X86-SSE2-NEXT: pxor %xmm3, %xmm3 3218; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3219; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3220; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3221; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3222; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3223; X86-SSE2-NEXT: retl 3224; 3225; X86-SSE41-LABEL: sext_4i1_to_4i64: 3226; X86-SSE41: # %bb.0: 3227; X86-SSE41-NEXT: pslld $31, %xmm0 3228; X86-SSE41-NEXT: psrad $31, %xmm0 3229; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3230; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3231; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3232; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 3233; X86-SSE41-NEXT: retl 3234 %extmask = sext <4 x i1> %mask to <4 x i64> 3235 ret <4 x i64> %extmask 3236} 3237 3238define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { 3239; SSE2-LABEL: sext_4i8_to_4i64: 3240; SSE2: # %bb.0: 3241; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3242; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3243; SSE2-NEXT: psrad $24, %xmm1 3244; SSE2-NEXT: pxor %xmm2, %xmm2 3245; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3246; SSE2-NEXT: movdqa %xmm1, %xmm0 3247; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3248; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3249; SSE2-NEXT: retq 3250; 3251; SSSE3-LABEL: sext_4i8_to_4i64: 3252; SSSE3: # %bb.0: 3253; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3254; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3255; SSSE3-NEXT: psrad $24, %xmm1 3256; SSSE3-NEXT: pxor %xmm2, %xmm2 3257; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3258; SSSE3-NEXT: movdqa %xmm1, %xmm0 3259; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3260; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3261; SSSE3-NEXT: retq 3262; 3263; SSE41-LABEL: sext_4i8_to_4i64: 3264; SSE41: # %bb.0: 3265; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 3266; SSE41-NEXT: psrld $16, %xmm0 3267; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 3268; SSE41-NEXT: movdqa %xmm2, %xmm0 3269; SSE41-NEXT: retq 3270; 3271; AVX1-LABEL: sext_4i8_to_4i64: 3272; AVX1: # %bb.0: 3273; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 3274; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 3275; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 3276; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3277; AVX1-NEXT: retq 3278; 3279; AVX2-LABEL: sext_4i8_to_4i64: 3280; AVX2: # %bb.0: 3281; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 3282; AVX2-NEXT: retq 3283; 3284; AVX512-LABEL: sext_4i8_to_4i64: 3285; AVX512: # %bb.0: 3286; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 3287; AVX512-NEXT: retq 3288; 3289; X86-SSE2-LABEL: sext_4i8_to_4i64: 3290; X86-SSE2: # %bb.0: 3291; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3292; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3293; X86-SSE2-NEXT: psrad $24, %xmm1 3294; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3295; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3296; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 3297; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3298; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3299; X86-SSE2-NEXT: retl 3300; 3301; X86-SSE41-LABEL: sext_4i8_to_4i64: 3302; X86-SSE41: # %bb.0: 3303; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 3304; X86-SSE41-NEXT: psrld $16, %xmm0 3305; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 3306; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 3307; X86-SSE41-NEXT: retl 3308 %extmask = sext <4 x i8> %mask to <4 x i64> 3309 ret <4 x i64> %extmask 3310} 3311 3312define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind { 3313; SSE-LABEL: sext_32xi1_to_32xi8: 3314; SSE: # %bb.0: 3315; SSE-NEXT: pcmpeqw %xmm5, %xmm1 3316; SSE-NEXT: pcmpeqw %xmm4, %xmm0 3317; SSE-NEXT: packsswb %xmm1, %xmm0 3318; SSE-NEXT: pcmpeqw %xmm7, %xmm3 3319; SSE-NEXT: pcmpeqw %xmm6, %xmm2 3320; SSE-NEXT: packsswb %xmm3, %xmm2 3321; SSE-NEXT: movdqa %xmm2, %xmm1 3322; SSE-NEXT: retq 3323; 3324; AVX1-LABEL: sext_32xi1_to_32xi8: 3325; AVX1: # %bb.0: 3326; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3327; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 3328; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 3329; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 3330; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1 3331; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 3332; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3333; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm3 3334; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 3335; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0 3336; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3337; AVX1-NEXT: retq 3338; 3339; AVX2-LABEL: sext_32xi1_to_32xi8: 3340; AVX2: # %bb.0: 3341; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 3342; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 3343; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 3344; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3345; AVX2-NEXT: retq 3346; 3347; AVX512F-LABEL: sext_32xi1_to_32xi8: 3348; AVX512F: # %bb.0: 3349; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 3350; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 3351; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 3352; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 3353; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3354; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3355; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero 3356; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 3357; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3358; AVX512F-NEXT: retq 3359; 3360; AVX512BW-LABEL: sext_32xi1_to_32xi8: 3361; AVX512BW: # %bb.0: 3362; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 3363; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 3364; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3365; AVX512BW-NEXT: retq 3366; 3367; X86-SSE-LABEL: sext_32xi1_to_32xi8: 3368; X86-SSE: # %bb.0: 3369; X86-SSE-NEXT: pushl %ebp 3370; X86-SSE-NEXT: movl %esp, %ebp 3371; X86-SSE-NEXT: andl $-16, %esp 3372; X86-SSE-NEXT: subl $16, %esp 3373; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3 3374; X86-SSE-NEXT: pcmpeqw 40(%ebp), %xmm1 3375; X86-SSE-NEXT: pcmpeqw 24(%ebp), %xmm0 3376; X86-SSE-NEXT: packsswb %xmm1, %xmm0 3377; X86-SSE-NEXT: pcmpeqw 72(%ebp), %xmm3 3378; X86-SSE-NEXT: pcmpeqw 56(%ebp), %xmm2 3379; X86-SSE-NEXT: packsswb %xmm3, %xmm2 3380; X86-SSE-NEXT: movdqa %xmm2, %xmm1 3381; X86-SSE-NEXT: movl %ebp, %esp 3382; X86-SSE-NEXT: popl %ebp 3383; X86-SSE-NEXT: retl 3384 %a = icmp eq <32 x i16> %c1, %c2 3385 %b = sext <32 x i1> %a to <32 x i8> 3386 ret <32 x i8> %b 3387} 3388 3389define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) { 3390; SSE2-LABEL: sext_2i8_to_2i32: 3391; SSE2: # %bb.0: 3392; SSE2-NEXT: movzwl (%rdi), %eax 3393; SSE2-NEXT: movd %eax, %xmm0 3394; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3395; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3396; SSE2-NEXT: psrad $24, %xmm0 3397; SSE2-NEXT: paddd %xmm0, %xmm0 3398; SSE2-NEXT: retq 3399; 3400; SSSE3-LABEL: sext_2i8_to_2i32: 3401; SSSE3: # %bb.0: 3402; SSSE3-NEXT: movzwl (%rdi), %eax 3403; SSSE3-NEXT: movd %eax, %xmm0 3404; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3405; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3406; SSSE3-NEXT: psrad $24, %xmm0 3407; SSSE3-NEXT: paddd %xmm0, %xmm0 3408; SSSE3-NEXT: retq 3409; 3410; SSE41-LABEL: sext_2i8_to_2i32: 3411; SSE41: # %bb.0: 3412; SSE41-NEXT: movzwl (%rdi), %eax 3413; SSE41-NEXT: movd %eax, %xmm0 3414; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 3415; SSE41-NEXT: paddd %xmm0, %xmm0 3416; SSE41-NEXT: retq 3417; 3418; AVX-LABEL: sext_2i8_to_2i32: 3419; AVX: # %bb.0: 3420; AVX-NEXT: movzwl (%rdi), %eax 3421; AVX-NEXT: vmovd %eax, %xmm0 3422; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 3423; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 3424; AVX-NEXT: retq 3425; 3426; X86-SSE2-LABEL: sext_2i8_to_2i32: 3427; X86-SSE2: # %bb.0: 3428; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3429; X86-SSE2-NEXT: movzwl (%eax), %eax 3430; X86-SSE2-NEXT: movd %eax, %xmm0 3431; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3432; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3433; X86-SSE2-NEXT: psrad $24, %xmm0 3434; X86-SSE2-NEXT: paddd %xmm0, %xmm0 3435; X86-SSE2-NEXT: retl 3436; 3437; X86-SSE41-LABEL: sext_2i8_to_2i32: 3438; X86-SSE41: # %bb.0: 3439; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3440; X86-SSE41-NEXT: movzwl (%eax), %eax 3441; X86-SSE41-NEXT: movd %eax, %xmm0 3442; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 3443; X86-SSE41-NEXT: paddd %xmm0, %xmm0 3444; X86-SSE41-NEXT: retl 3445 %x = load <2 x i8>, <2 x i8>* %addr, align 1 3446 %y = sext <2 x i8> %x to <2 x i32> 3447 %z = add <2 x i32>%y, %y 3448 ret <2 x i32>%z 3449} 3450 3451define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { 3452; SSE2-LABEL: sext_4i17_to_4i32: 3453; SSE2: # %bb.0: 3454; SSE2-NEXT: movq (%rdi), %rax 3455; SSE2-NEXT: movl %eax, %ecx 3456; SSE2-NEXT: shll $15, %ecx 3457; SSE2-NEXT: sarl $15, %ecx 3458; SSE2-NEXT: movd %ecx, %xmm0 3459; SSE2-NEXT: movq %rax, %rcx 3460; SSE2-NEXT: shrq $17, %rcx 3461; SSE2-NEXT: shll $15, %ecx 3462; SSE2-NEXT: sarl $15, %ecx 3463; SSE2-NEXT: movd %ecx, %xmm1 3464; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3465; SSE2-NEXT: movl 8(%rdi), %ecx 3466; SSE2-NEXT: shll $13, %ecx 3467; SSE2-NEXT: movq %rax, %rdx 3468; SSE2-NEXT: shrq $51, %rdx 3469; SSE2-NEXT: orl %ecx, %edx 3470; SSE2-NEXT: shll $15, %edx 3471; SSE2-NEXT: sarl $15, %edx 3472; SSE2-NEXT: movd %edx, %xmm1 3473; SSE2-NEXT: shrq $34, %rax 3474; SSE2-NEXT: shll $15, %eax 3475; SSE2-NEXT: sarl $15, %eax 3476; SSE2-NEXT: movd %eax, %xmm2 3477; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3478; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3479; SSE2-NEXT: retq 3480; 3481; SSSE3-LABEL: sext_4i17_to_4i32: 3482; SSSE3: # %bb.0: 3483; SSSE3-NEXT: movq (%rdi), %rax 3484; SSSE3-NEXT: movl %eax, %ecx 3485; SSSE3-NEXT: shll $15, %ecx 3486; SSSE3-NEXT: sarl $15, %ecx 3487; SSSE3-NEXT: movd %ecx, %xmm0 3488; SSSE3-NEXT: movq %rax, %rcx 3489; SSSE3-NEXT: shrq $17, %rcx 3490; SSSE3-NEXT: shll $15, %ecx 3491; SSSE3-NEXT: sarl $15, %ecx 3492; SSSE3-NEXT: movd %ecx, %xmm1 3493; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3494; SSSE3-NEXT: movl 8(%rdi), %ecx 3495; SSSE3-NEXT: shll $13, %ecx 3496; SSSE3-NEXT: movq %rax, %rdx 3497; SSSE3-NEXT: shrq $51, %rdx 3498; SSSE3-NEXT: orl %ecx, %edx 3499; SSSE3-NEXT: shll $15, %edx 3500; SSSE3-NEXT: sarl $15, %edx 3501; SSSE3-NEXT: movd %edx, %xmm1 3502; SSSE3-NEXT: shrq $34, %rax 3503; SSSE3-NEXT: shll $15, %eax 3504; SSSE3-NEXT: sarl $15, %eax 3505; SSSE3-NEXT: movd %eax, %xmm2 3506; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3507; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3508; SSSE3-NEXT: retq 3509; 3510; SSE41-LABEL: sext_4i17_to_4i32: 3511; SSE41: # %bb.0: 3512; SSE41-NEXT: movq (%rdi), %rax 3513; SSE41-NEXT: movq %rax, %rcx 3514; SSE41-NEXT: shrq $17, %rcx 3515; SSE41-NEXT: shll $15, %ecx 3516; SSE41-NEXT: sarl $15, %ecx 3517; SSE41-NEXT: movl %eax, %edx 3518; SSE41-NEXT: shll $15, %edx 3519; SSE41-NEXT: sarl $15, %edx 3520; SSE41-NEXT: movd %edx, %xmm0 3521; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 3522; SSE41-NEXT: movq %rax, %rcx 3523; SSE41-NEXT: shrq $34, %rcx 3524; SSE41-NEXT: shll $15, %ecx 3525; SSE41-NEXT: sarl $15, %ecx 3526; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 3527; SSE41-NEXT: movl 8(%rdi), %ecx 3528; SSE41-NEXT: shll $13, %ecx 3529; SSE41-NEXT: shrq $51, %rax 3530; SSE41-NEXT: orl %ecx, %eax 3531; SSE41-NEXT: shll $15, %eax 3532; SSE41-NEXT: sarl $15, %eax 3533; SSE41-NEXT: pinsrd $3, %eax, %xmm0 3534; SSE41-NEXT: retq 3535; 3536; AVX-LABEL: sext_4i17_to_4i32: 3537; AVX: # %bb.0: 3538; AVX-NEXT: movq (%rdi), %rax 3539; AVX-NEXT: movq %rax, %rcx 3540; AVX-NEXT: shrq $17, %rcx 3541; AVX-NEXT: shll $15, %ecx 3542; AVX-NEXT: sarl $15, %ecx 3543; AVX-NEXT: movl %eax, %edx 3544; AVX-NEXT: shll $15, %edx 3545; AVX-NEXT: sarl $15, %edx 3546; AVX-NEXT: vmovd %edx, %xmm0 3547; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 3548; AVX-NEXT: movq %rax, %rcx 3549; AVX-NEXT: shrq $34, %rcx 3550; AVX-NEXT: shll $15, %ecx 3551; AVX-NEXT: sarl $15, %ecx 3552; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 3553; AVX-NEXT: movl 8(%rdi), %ecx 3554; AVX-NEXT: shll $13, %ecx 3555; AVX-NEXT: shrq $51, %rax 3556; AVX-NEXT: orl %ecx, %eax 3557; AVX-NEXT: shll $15, %eax 3558; AVX-NEXT: sarl $15, %eax 3559; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 3560; AVX-NEXT: retq 3561; 3562; X86-SSE2-LABEL: sext_4i17_to_4i32: 3563; X86-SSE2: # %bb.0: 3564; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3565; X86-SSE2-NEXT: movl (%eax), %ecx 3566; X86-SSE2-NEXT: movl 4(%eax), %edx 3567; X86-SSE2-NEXT: movl 8(%eax), %eax 3568; X86-SSE2-NEXT: shldl $13, %edx, %eax 3569; X86-SSE2-NEXT: shll $15, %eax 3570; X86-SSE2-NEXT: sarl $15, %eax 3571; X86-SSE2-NEXT: movd %eax, %xmm0 3572; X86-SSE2-NEXT: movl %edx, %eax 3573; X86-SSE2-NEXT: shll $13, %eax 3574; X86-SSE2-NEXT: sarl $15, %eax 3575; X86-SSE2-NEXT: movd %eax, %xmm1 3576; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3577; X86-SSE2-NEXT: shldl $15, %ecx, %edx 3578; X86-SSE2-NEXT: shll $15, %ecx 3579; X86-SSE2-NEXT: sarl $15, %ecx 3580; X86-SSE2-NEXT: movd %ecx, %xmm0 3581; X86-SSE2-NEXT: shll $15, %edx 3582; X86-SSE2-NEXT: sarl $15, %edx 3583; X86-SSE2-NEXT: movd %edx, %xmm2 3584; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3585; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3586; X86-SSE2-NEXT: retl 3587; 3588; X86-SSE41-LABEL: sext_4i17_to_4i32: 3589; X86-SSE41: # %bb.0: 3590; X86-SSE41-NEXT: pushl %esi 3591; X86-SSE41-NEXT: .cfi_def_cfa_offset 8 3592; X86-SSE41-NEXT: .cfi_offset %esi, -8 3593; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3594; X86-SSE41-NEXT: movl (%eax), %ecx 3595; X86-SSE41-NEXT: movl 4(%eax), %edx 3596; X86-SSE41-NEXT: movl %edx, %esi 3597; X86-SSE41-NEXT: movl 8(%eax), %eax 3598; X86-SSE41-NEXT: shldl $13, %edx, %eax 3599; X86-SSE41-NEXT: shldl $15, %ecx, %edx 3600; X86-SSE41-NEXT: shll $15, %edx 3601; X86-SSE41-NEXT: sarl $15, %edx 3602; X86-SSE41-NEXT: shll $15, %ecx 3603; X86-SSE41-NEXT: sarl $15, %ecx 3604; X86-SSE41-NEXT: movd %ecx, %xmm0 3605; X86-SSE41-NEXT: pinsrd $1, %edx, %xmm0 3606; X86-SSE41-NEXT: shll $13, %esi 3607; X86-SSE41-NEXT: sarl $15, %esi 3608; X86-SSE41-NEXT: pinsrd $2, %esi, %xmm0 3609; X86-SSE41-NEXT: shll $15, %eax 3610; X86-SSE41-NEXT: sarl $15, %eax 3611; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0 3612; X86-SSE41-NEXT: popl %esi 3613; X86-SSE41-NEXT: .cfi_def_cfa_offset 4 3614; X86-SSE41-NEXT: retl 3615 %a = load <4 x i17>, <4 x i17>* %ptr 3616 %b = sext <4 x i17> %a to <4 x i32> 3617 ret <4 x i32> %b 3618} 3619 3620define <8 x i64> @sext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { 3621; SSE2-LABEL: sext_8i6_to_8i64: 3622; SSE2: # %bb.0: # %entry 3623; SSE2-NEXT: movd %edi, %xmm0 3624; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3625; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3626; SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 3627; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3628; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3629; SSE2-NEXT: psllq $58, %xmm0 3630; SSE2-NEXT: movdqa %xmm0, %xmm1 3631; SSE2-NEXT: psrad $31, %xmm1 3632; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3633; SSE2-NEXT: psrad $26, %xmm0 3634; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3635; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3636; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3637; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3638; SSE2-NEXT: psllq $58, %xmm1 3639; SSE2-NEXT: movdqa %xmm1, %xmm2 3640; SSE2-NEXT: psrad $31, %xmm2 3641; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3642; SSE2-NEXT: psrad $26, %xmm1 3643; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3644; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3645; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3646; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3647; SSE2-NEXT: psllq $58, %xmm2 3648; SSE2-NEXT: movdqa %xmm2, %xmm4 3649; SSE2-NEXT: psrad $31, %xmm4 3650; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3651; SSE2-NEXT: psrad $26, %xmm2 3652; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3653; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3654; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3655; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3656; SSE2-NEXT: psllq $58, %xmm3 3657; SSE2-NEXT: movdqa %xmm3, %xmm4 3658; SSE2-NEXT: psrad $31, %xmm4 3659; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3660; SSE2-NEXT: psrad $26, %xmm3 3661; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3662; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3663; SSE2-NEXT: retq 3664; 3665; SSSE3-LABEL: sext_8i6_to_8i64: 3666; SSSE3: # %bb.0: # %entry 3667; SSSE3-NEXT: movd %edi, %xmm0 3668; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3669; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3670; SSSE3-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 3671; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3672; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3673; SSSE3-NEXT: psllq $58, %xmm0 3674; SSSE3-NEXT: movdqa %xmm0, %xmm1 3675; SSSE3-NEXT: psrad $31, %xmm1 3676; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3677; SSSE3-NEXT: psrad $26, %xmm0 3678; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3679; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3680; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3681; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3682; SSSE3-NEXT: psllq $58, %xmm1 3683; SSSE3-NEXT: movdqa %xmm1, %xmm2 3684; SSSE3-NEXT: psrad $31, %xmm2 3685; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3686; SSSE3-NEXT: psrad $26, %xmm1 3687; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3688; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3689; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3690; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3691; SSSE3-NEXT: psllq $58, %xmm2 3692; SSSE3-NEXT: movdqa %xmm2, %xmm4 3693; SSSE3-NEXT: psrad $31, %xmm4 3694; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3695; SSSE3-NEXT: psrad $26, %xmm2 3696; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3697; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3698; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3699; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3700; SSSE3-NEXT: psllq $58, %xmm3 3701; SSSE3-NEXT: movdqa %xmm3, %xmm4 3702; SSSE3-NEXT: psrad $31, %xmm4 3703; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3704; SSSE3-NEXT: psrad $26, %xmm3 3705; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3706; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3707; SSSE3-NEXT: retq 3708; 3709; SSE41-LABEL: sext_8i6_to_8i64: 3710; SSE41: # %bb.0: # %entry 3711; SSE41-NEXT: movd %edi, %xmm0 3712; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3713; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3714; SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 3715; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3716; SSE41-NEXT: psllq $58, %xmm0 3717; SSE41-NEXT: movdqa %xmm0, %xmm1 3718; SSE41-NEXT: psrad $31, %xmm1 3719; SSE41-NEXT: psrad $26, %xmm0 3720; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3721; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3722; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3723; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 3724; SSE41-NEXT: psllq $58, %xmm1 3725; SSE41-NEXT: movdqa %xmm1, %xmm2 3726; SSE41-NEXT: psrad $31, %xmm2 3727; SSE41-NEXT: psrad $26, %xmm1 3728; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3729; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3730; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] 3731; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 3732; SSE41-NEXT: psllq $58, %xmm2 3733; SSE41-NEXT: movdqa %xmm2, %xmm4 3734; SSE41-NEXT: psrad $31, %xmm4 3735; SSE41-NEXT: psrad $26, %xmm2 3736; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3737; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 3738; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3739; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3740; SSE41-NEXT: psllq $58, %xmm3 3741; SSE41-NEXT: movdqa %xmm3, %xmm4 3742; SSE41-NEXT: psrad $31, %xmm4 3743; SSE41-NEXT: psrad $26, %xmm3 3744; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3745; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] 3746; SSE41-NEXT: retq 3747; 3748; AVX1-LABEL: sext_8i6_to_8i64: 3749; AVX1: # %bb.0: # %entry 3750; AVX1-NEXT: vmovd %edi, %xmm0 3751; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3752; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 3753; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3754; AVX1-NEXT: vpsllw $10, %xmm0, %xmm0 3755; AVX1-NEXT: vpsraw $10, %xmm0, %xmm1 3756; AVX1-NEXT: vpmovsxwq %xmm1, %xmm0 3757; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 3758; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 3759; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3760; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 3761; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 3762; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 3763; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1 3764; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 3765; AVX1-NEXT: retq 3766; 3767; AVX2-LABEL: sext_8i6_to_8i64: 3768; AVX2: # %bb.0: # %entry 3769; AVX2-NEXT: vmovd %edi, %xmm0 3770; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 3771; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3772; AVX2-NEXT: vpsllw $10, %xmm0, %xmm0 3773; AVX2-NEXT: vpsraw $10, %xmm0, %xmm1 3774; AVX2-NEXT: vpmovsxwq %xmm1, %ymm0 3775; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3776; AVX2-NEXT: vpmovsxwq %xmm1, %ymm1 3777; AVX2-NEXT: retq 3778; 3779; AVX512-LABEL: sext_8i6_to_8i64: 3780; AVX512: # %bb.0: # %entry 3781; AVX512-NEXT: vmovd %edi, %xmm0 3782; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 3783; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3784; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3785; AVX512-NEXT: vpsllq $58, %zmm0, %zmm0 3786; AVX512-NEXT: vpsraq $58, %zmm0, %zmm0 3787; AVX512-NEXT: retq 3788; 3789; X86-SSE2-LABEL: sext_8i6_to_8i64: 3790; X86-SSE2: # %bb.0: # %entry 3791; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3792; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3793; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3794; X86-SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 3795; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3796; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3797; X86-SSE2-NEXT: psllq $58, %xmm0 3798; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 3799; X86-SSE2-NEXT: psrad $31, %xmm1 3800; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3801; X86-SSE2-NEXT: psrad $26, %xmm0 3802; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3803; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3804; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3805; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3806; X86-SSE2-NEXT: psllq $58, %xmm1 3807; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 3808; X86-SSE2-NEXT: psrad $31, %xmm2 3809; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3810; X86-SSE2-NEXT: psrad $26, %xmm1 3811; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3812; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3813; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3814; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3815; X86-SSE2-NEXT: psllq $58, %xmm2 3816; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 3817; X86-SSE2-NEXT: psrad $31, %xmm4 3818; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3819; X86-SSE2-NEXT: psrad $26, %xmm2 3820; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3821; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3822; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3823; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3824; X86-SSE2-NEXT: psllq $58, %xmm3 3825; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 3826; X86-SSE2-NEXT: psrad $31, %xmm4 3827; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3828; X86-SSE2-NEXT: psrad $26, %xmm3 3829; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3830; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3831; X86-SSE2-NEXT: retl 3832; 3833; X86-SSE41-LABEL: sext_8i6_to_8i64: 3834; X86-SSE41: # %bb.0: # %entry 3835; X86-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3836; X86-SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3837; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3838; X86-SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 3839; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3840; X86-SSE41-NEXT: psllq $58, %xmm0 3841; X86-SSE41-NEXT: movdqa %xmm0, %xmm1 3842; X86-SSE41-NEXT: psrad $31, %xmm1 3843; X86-SSE41-NEXT: psrad $26, %xmm0 3844; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3845; X86-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3846; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3847; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 3848; X86-SSE41-NEXT: psllq $58, %xmm1 3849; X86-SSE41-NEXT: movdqa %xmm1, %xmm2 3850; X86-SSE41-NEXT: psrad $31, %xmm2 3851; X86-SSE41-NEXT: psrad $26, %xmm1 3852; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3853; X86-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3854; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] 3855; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 3856; X86-SSE41-NEXT: psllq $58, %xmm2 3857; X86-SSE41-NEXT: movdqa %xmm2, %xmm4 3858; X86-SSE41-NEXT: psrad $31, %xmm4 3859; X86-SSE41-NEXT: psrad $26, %xmm2 3860; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3861; X86-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 3862; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3863; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3864; X86-SSE41-NEXT: psllq $58, %xmm3 3865; X86-SSE41-NEXT: movdqa %xmm3, %xmm4 3866; X86-SSE41-NEXT: psrad $31, %xmm4 3867; X86-SSE41-NEXT: psrad $26, %xmm3 3868; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3869; X86-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] 3870; X86-SSE41-NEXT: retl 3871entry: 3872 %a = trunc i32 %x to i6 3873 %b = insertelement <8 x i6> undef, i6 %a, i32 0 3874 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer 3875 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7> 3876 %e = sext <8 x i6> %d to <8 x i64> 3877 ret <8 x i64> %e 3878} 3879 3880define <8 x i32> @zext_negate_sext(<8 x i8> %x) { 3881; SSE2-LABEL: zext_negate_sext: 3882; SSE2: # %bb.0: 3883; SSE2-NEXT: pxor %xmm1, %xmm1 3884; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3885; SSE2-NEXT: psubw %xmm0, %xmm1 3886; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3887; SSE2-NEXT: psrad $16, %xmm0 3888; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3889; SSE2-NEXT: psrad $16, %xmm1 3890; SSE2-NEXT: retq 3891; 3892; SSSE3-LABEL: zext_negate_sext: 3893; SSSE3: # %bb.0: 3894; SSSE3-NEXT: pxor %xmm1, %xmm1 3895; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3896; SSSE3-NEXT: psubw %xmm0, %xmm1 3897; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3898; SSSE3-NEXT: psrad $16, %xmm0 3899; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3900; SSSE3-NEXT: psrad $16, %xmm1 3901; SSSE3-NEXT: retq 3902; 3903; SSE41-LABEL: zext_negate_sext: 3904; SSE41: # %bb.0: 3905; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3906; SSE41-NEXT: pxor %xmm1, %xmm1 3907; SSE41-NEXT: psubw %xmm0, %xmm1 3908; SSE41-NEXT: pmovsxwd %xmm1, %xmm0 3909; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3910; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 3911; SSE41-NEXT: retq 3912; 3913; AVX1-LABEL: zext_negate_sext: 3914; AVX1: # %bb.0: 3915; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 3916; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 3917; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 3918; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 3919; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3920; AVX1-NEXT: vpsubd %xmm0, %xmm2, %xmm0 3921; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3922; AVX1-NEXT: retq 3923; 3924; AVX2-LABEL: zext_negate_sext: 3925; AVX2: # %bb.0: 3926; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3927; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 3928; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0 3929; AVX2-NEXT: retq 3930; 3931; AVX512-LABEL: zext_negate_sext: 3932; AVX512: # %bb.0: 3933; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3934; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 3935; AVX512-NEXT: vpsubd %ymm0, %ymm1, %ymm0 3936; AVX512-NEXT: retq 3937; 3938; X86-SSE2-LABEL: zext_negate_sext: 3939; X86-SSE2: # %bb.0: 3940; X86-SSE2-NEXT: pxor %xmm1, %xmm1 3941; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3942; X86-SSE2-NEXT: psubw %xmm0, %xmm1 3943; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3944; X86-SSE2-NEXT: psrad $16, %xmm0 3945; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3946; X86-SSE2-NEXT: psrad $16, %xmm1 3947; X86-SSE2-NEXT: retl 3948; 3949; X86-SSE41-LABEL: zext_negate_sext: 3950; X86-SSE41: # %bb.0: 3951; X86-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3952; X86-SSE41-NEXT: pxor %xmm1, %xmm1 3953; X86-SSE41-NEXT: psubw %xmm0, %xmm1 3954; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm0 3955; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3956; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm1 3957; X86-SSE41-NEXT: retl 3958 %z = zext <8 x i8> %x to <8 x i16> 3959 %neg = sub nsw <8 x i16> zeroinitializer, %z 3960 %r = sext <8 x i16> %neg to <8 x i32> 3961 ret <8 x i32> %r 3962} 3963 3964define <8 x i32> @zext_decremenet_sext(<8 x i8> %x) { 3965; SSE2-LABEL: zext_decremenet_sext: 3966; SSE2: # %bb.0: 3967; SSE2-NEXT: pxor %xmm1, %xmm1 3968; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3969; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 3970; SSE2-NEXT: paddw %xmm0, %xmm1 3971; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3972; SSE2-NEXT: psrad $16, %xmm0 3973; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3974; SSE2-NEXT: psrad $16, %xmm1 3975; SSE2-NEXT: retq 3976; 3977; SSSE3-LABEL: zext_decremenet_sext: 3978; SSSE3: # %bb.0: 3979; SSSE3-NEXT: pxor %xmm1, %xmm1 3980; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3981; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 3982; SSSE3-NEXT: paddw %xmm0, %xmm1 3983; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3984; SSSE3-NEXT: psrad $16, %xmm0 3985; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3986; SSSE3-NEXT: psrad $16, %xmm1 3987; SSSE3-NEXT: retq 3988; 3989; SSE41-LABEL: zext_decremenet_sext: 3990; SSE41: # %bb.0: 3991; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3992; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 3993; SSE41-NEXT: paddw %xmm0, %xmm1 3994; SSE41-NEXT: pmovsxwd %xmm1, %xmm0 3995; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3996; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 3997; SSE41-NEXT: retq 3998; 3999; AVX1-LABEL: zext_decremenet_sext: 4000; AVX1: # %bb.0: 4001; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 4002; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 4003; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 4004; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 4005; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4006; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 4007; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4008; AVX1-NEXT: retq 4009; 4010; AVX2-LABEL: zext_decremenet_sext: 4011; AVX2: # %bb.0: 4012; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 4013; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 4014; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4015; AVX2-NEXT: retq 4016; 4017; AVX512-LABEL: zext_decremenet_sext: 4018; AVX512: # %bb.0: 4019; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 4020; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 4021; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4022; AVX512-NEXT: retq 4023; 4024; X86-SSE2-LABEL: zext_decremenet_sext: 4025; X86-SSE2: # %bb.0: 4026; X86-SSE2-NEXT: pxor %xmm1, %xmm1 4027; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4028; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 4029; X86-SSE2-NEXT: paddw %xmm0, %xmm1 4030; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4031; X86-SSE2-NEXT: psrad $16, %xmm0 4032; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4033; X86-SSE2-NEXT: psrad $16, %xmm1 4034; X86-SSE2-NEXT: retl 4035; 4036; X86-SSE41-LABEL: zext_decremenet_sext: 4037; X86-SSE41: # %bb.0: 4038; X86-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4039; X86-SSE41-NEXT: pcmpeqd %xmm1, %xmm1 4040; X86-SSE41-NEXT: paddw %xmm0, %xmm1 4041; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm0 4042; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4043; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm1 4044; X86-SSE41-NEXT: retl 4045 %z = zext <8 x i8> %x to <8 x i16> 4046 %dec = add <8 x i16> %z, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 4047 %r = sext <8 x i16> %dec to <8 x i32> 4048 ret <8 x i32> %r 4049} 4050