1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2 14 15; Just one 32-bit run to make sure we do reasonable things for i64 cases. 16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 17 18declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 19 20; 21; Variable Shifts 22; 23 24define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { 25; SSE2-LABEL: var_funnnel_v2i32: 26; SSE2: # %bb.0: 27; SSE2-NEXT: pxor %xmm2, %xmm2 28; SSE2-NEXT: psubd %xmm1, %xmm2 29; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 30; SSE2-NEXT: pslld $23, %xmm2 31; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 32; SSE2-NEXT: cvttps2dq %xmm2, %xmm1 33; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 34; SSE2-NEXT: pmuludq %xmm1, %xmm0 35; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 36; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 37; SSE2-NEXT: pmuludq %xmm2, %xmm1 38; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 39; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 40; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 41; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 42; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 43; SSE2-NEXT: por %xmm3, %xmm0 44; SSE2-NEXT: retq 45; 46; SSE41-LABEL: var_funnnel_v2i32: 47; SSE41: # %bb.0: 48; SSE41-NEXT: pxor %xmm2, %xmm2 49; SSE41-NEXT: psubd %xmm1, %xmm2 50; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 51; SSE41-NEXT: pslld $23, %xmm2 52; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 53; SSE41-NEXT: cvttps2dq %xmm2, %xmm1 54; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 55; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 56; SSE41-NEXT: pmuludq %xmm2, %xmm3 57; SSE41-NEXT: pmuludq %xmm1, %xmm0 58; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 59; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 60; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 61; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 62; SSE41-NEXT: por %xmm1, %xmm0 63; SSE41-NEXT: retq 64; 65; AVX1-LABEL: var_funnnel_v2i32: 66; AVX1: # %bb.0: 67; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 68; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 69; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 70; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 71; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 72; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 73; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 74; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 75; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 76; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 77; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 78; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 79; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 80; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 81; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 82; AVX1-NEXT: retq 83; 84; AVX2-LABEL: var_funnnel_v2i32: 85; AVX2: # %bb.0: 86; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 87; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 88; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] 89; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 90; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 91; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] 92; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 93; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 94; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 95; AVX2-NEXT: retq 96; 97; AVX512F-LABEL: var_funnnel_v2i32: 98; AVX512F: # %bb.0: 99; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 100; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 101; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0 102; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 103; AVX512F-NEXT: vzeroupper 104; AVX512F-NEXT: retq 105; 106; AVX512VL-LABEL: var_funnnel_v2i32: 107; AVX512VL: # %bb.0: 108; AVX512VL-NEXT: vprorvd %xmm1, %xmm0, %xmm0 109; AVX512VL-NEXT: retq 110; 111; AVX512BW-LABEL: var_funnnel_v2i32: 112; AVX512BW: # %bb.0: 113; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 114; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 115; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0 116; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 117; AVX512BW-NEXT: vzeroupper 118; AVX512BW-NEXT: retq 119; 120; AVX512VLBW-LABEL: var_funnnel_v2i32: 121; AVX512VLBW: # %bb.0: 122; AVX512VLBW-NEXT: vprorvd %xmm1, %xmm0, %xmm0 123; AVX512VLBW-NEXT: retq 124; 125; AVX512VBMI2-LABEL: var_funnnel_v2i32: 126; AVX512VBMI2: # %bb.0: 127; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 128; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 129; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0 130; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 131; AVX512VBMI2-NEXT: vzeroupper 132; AVX512VBMI2-NEXT: retq 133; 134; AVX512VLVBMI2-LABEL: var_funnnel_v2i32: 135; AVX512VLVBMI2: # %bb.0: 136; AVX512VLVBMI2-NEXT: vprorvd %xmm1, %xmm0, %xmm0 137; AVX512VLVBMI2-NEXT: retq 138; 139; XOP-LABEL: var_funnnel_v2i32: 140; XOP: # %bb.0: 141; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 142; XOP-NEXT: vpsubd %xmm1, %xmm2, %xmm1 143; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0 144; XOP-NEXT: retq 145; 146; X86-SSE2-LABEL: var_funnnel_v2i32: 147; X86-SSE2: # %bb.0: 148; X86-SSE2-NEXT: pxor %xmm2, %xmm2 149; X86-SSE2-NEXT: psubd %xmm1, %xmm2 150; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 151; X86-SSE2-NEXT: pslld $23, %xmm2 152; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 153; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1 154; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 155; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 156; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 157; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 158; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 159; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 160; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 161; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 162; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 163; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 164; X86-SSE2-NEXT: por %xmm3, %xmm0 165; X86-SSE2-NEXT: retl 166 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt) 167 ret <2 x i32> %res 168} 169 170; 171; Uniform Variable Shifts 172; 173 174define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { 175; SSE2-LABEL: splatvar_funnnel_v2i32: 176; SSE2: # %bb.0: 177; SSE2-NEXT: pxor %xmm2, %xmm2 178; SSE2-NEXT: psubd %xmm1, %xmm2 179; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 180; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0] 181; SSE2-NEXT: pslld $23, %xmm1 182; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 183; SSE2-NEXT: cvttps2dq %xmm1, %xmm1 184; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 185; SSE2-NEXT: pmuludq %xmm1, %xmm0 186; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 187; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 188; SSE2-NEXT: pmuludq %xmm2, %xmm1 189; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 190; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 191; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 192; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 193; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 194; SSE2-NEXT: por %xmm3, %xmm0 195; SSE2-NEXT: retq 196; 197; SSE41-LABEL: splatvar_funnnel_v2i32: 198; SSE41: # %bb.0: 199; SSE41-NEXT: pxor %xmm2, %xmm2 200; SSE41-NEXT: psubd %xmm1, %xmm2 201; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 202; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0] 203; SSE41-NEXT: pslld $23, %xmm1 204; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 205; SSE41-NEXT: cvttps2dq %xmm1, %xmm1 206; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 207; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 208; SSE41-NEXT: pmuludq %xmm2, %xmm3 209; SSE41-NEXT: pmuludq %xmm1, %xmm0 210; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 211; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 212; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 213; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 214; SSE41-NEXT: por %xmm1, %xmm0 215; SSE41-NEXT: retq 216; 217; AVX1-LABEL: splatvar_funnnel_v2i32: 218; AVX1: # %bb.0: 219; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 220; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 221; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 222; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 223; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 224; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 225; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 226; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 227; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 228; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 229; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 230; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 231; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 232; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 233; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 234; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 235; AVX1-NEXT: retq 236; 237; AVX2-LABEL: splatvar_funnnel_v2i32: 238; AVX2: # %bb.0: 239; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 240; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 241; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 242; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] 243; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 244; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero 245; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm2 246; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] 247; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 248; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 249; AVX2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 250; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 251; AVX2-NEXT: retq 252; 253; AVX512F-LABEL: splatvar_funnnel_v2i32: 254; AVX512F: # %bb.0: 255; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 256; AVX512F-NEXT: vpbroadcastd %xmm1, %xmm1 257; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0 258; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 259; AVX512F-NEXT: vzeroupper 260; AVX512F-NEXT: retq 261; 262; AVX512VL-LABEL: splatvar_funnnel_v2i32: 263; AVX512VL: # %bb.0: 264; AVX512VL-NEXT: vpbroadcastd %xmm1, %xmm1 265; AVX512VL-NEXT: vprorvd %xmm1, %xmm0, %xmm0 266; AVX512VL-NEXT: retq 267; 268; AVX512BW-LABEL: splatvar_funnnel_v2i32: 269; AVX512BW: # %bb.0: 270; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 271; AVX512BW-NEXT: vpbroadcastd %xmm1, %xmm1 272; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0 273; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 274; AVX512BW-NEXT: vzeroupper 275; AVX512BW-NEXT: retq 276; 277; AVX512VLBW-LABEL: splatvar_funnnel_v2i32: 278; AVX512VLBW: # %bb.0: 279; AVX512VLBW-NEXT: vpbroadcastd %xmm1, %xmm1 280; AVX512VLBW-NEXT: vprorvd %xmm1, %xmm0, %xmm0 281; AVX512VLBW-NEXT: retq 282; 283; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32: 284; AVX512VBMI2: # %bb.0: 285; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 286; AVX512VBMI2-NEXT: vpbroadcastd %xmm1, %xmm1 287; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0 288; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 289; AVX512VBMI2-NEXT: vzeroupper 290; AVX512VBMI2-NEXT: retq 291; 292; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32: 293; AVX512VLVBMI2: # %bb.0: 294; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm1, %xmm1 295; AVX512VLVBMI2-NEXT: vprorvd %xmm1, %xmm0, %xmm0 296; AVX512VLVBMI2-NEXT: retq 297; 298; XOPAVX1-LABEL: splatvar_funnnel_v2i32: 299; XOPAVX1: # %bb.0: 300; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 301; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 302; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 303; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0 304; XOPAVX1-NEXT: retq 305; 306; XOPAVX2-LABEL: splatvar_funnnel_v2i32: 307; XOPAVX2: # %bb.0: 308; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 309; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 310; XOPAVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 311; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0 312; XOPAVX2-NEXT: retq 313; 314; X86-SSE2-LABEL: splatvar_funnnel_v2i32: 315; X86-SSE2: # %bb.0: 316; X86-SSE2-NEXT: pxor %xmm2, %xmm2 317; X86-SSE2-NEXT: psubd %xmm1, %xmm2 318; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 319; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0] 320; X86-SSE2-NEXT: pslld $23, %xmm1 321; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 322; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 323; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 324; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 325; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 326; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 327; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 328; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 329; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 330; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 331; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 332; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 333; X86-SSE2-NEXT: por %xmm3, %xmm0 334; X86-SSE2-NEXT: retl 335 %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer 336 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat) 337 ret <2 x i32> %res 338} 339 340; 341; Constant Shifts 342; 343 344define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind { 345; SSE2-LABEL: constant_funnnel_v2i32: 346; SSE2: # %bb.0: 347; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1] 348; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 349; SSE2-NEXT: pmuludq %xmm1, %xmm0 350; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 351; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 352; SSE2-NEXT: pmuludq %xmm2, %xmm1 353; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 354; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 355; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 356; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 357; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 358; SSE2-NEXT: por %xmm3, %xmm0 359; SSE2-NEXT: retq 360; 361; SSE41-LABEL: constant_funnnel_v2i32: 362; SSE41: # %bb.0: 363; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1] 364; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 365; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 366; SSE41-NEXT: pmuludq %xmm2, %xmm3 367; SSE41-NEXT: pmuludq %xmm1, %xmm0 368; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 369; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 370; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 371; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 372; SSE41-NEXT: por %xmm1, %xmm0 373; SSE41-NEXT: retq 374; 375; AVX1-LABEL: constant_funnnel_v2i32: 376; AVX1: # %bb.0: 377; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [268435456,134217728,1,1] 378; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 379; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 380; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 381; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 382; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 383; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 384; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2] 385; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 386; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 387; AVX1-NEXT: retq 388; 389; AVX2-LABEL: constant_funnnel_v2i32: 390; AVX2: # %bb.0: 391; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 392; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 393; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 394; AVX2-NEXT: retq 395; 396; AVX512F-LABEL: constant_funnnel_v2i32: 397; AVX512F: # %bb.0: 398; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 399; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u> 400; AVX512F-NEXT: vprorvd %zmm1, %zmm0, %zmm0 401; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 402; AVX512F-NEXT: vzeroupper 403; AVX512F-NEXT: retq 404; 405; AVX512VL-LABEL: constant_funnnel_v2i32: 406; AVX512VL: # %bb.0: 407; AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 408; AVX512VL-NEXT: retq 409; 410; AVX512BW-LABEL: constant_funnnel_v2i32: 411; AVX512BW: # %bb.0: 412; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 413; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u> 414; AVX512BW-NEXT: vprorvd %zmm1, %zmm0, %zmm0 415; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 416; AVX512BW-NEXT: vzeroupper 417; AVX512BW-NEXT: retq 418; 419; AVX512VLBW-LABEL: constant_funnnel_v2i32: 420; AVX512VLBW: # %bb.0: 421; AVX512VLBW-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 422; AVX512VLBW-NEXT: retq 423; 424; AVX512VBMI2-LABEL: constant_funnnel_v2i32: 425; AVX512VBMI2: # %bb.0: 426; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 427; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = <4,5,u,u> 428; AVX512VBMI2-NEXT: vprorvd %zmm1, %zmm0, %zmm0 429; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 430; AVX512VBMI2-NEXT: vzeroupper 431; AVX512VBMI2-NEXT: retq 432; 433; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32: 434; AVX512VLVBMI2: # %bb.0: 435; AVX512VLVBMI2-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 436; AVX512VLVBMI2-NEXT: retq 437; 438; XOP-LABEL: constant_funnnel_v2i32: 439; XOP: # %bb.0: 440; XOP-NEXT: vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 441; XOP-NEXT: retq 442; 443; X86-SSE2-LABEL: constant_funnnel_v2i32: 444; X86-SSE2: # %bb.0: 445; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1] 446; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 447; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 448; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] 449; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 450; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 451; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 452; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 453; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 454; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 455; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 456; X86-SSE2-NEXT: por %xmm3, %xmm0 457; X86-SSE2-NEXT: retl 458 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>) 459 ret <2 x i32> %res 460} 461 462; 463; Uniform Constant Shifts 464; 465 466define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind { 467; SSE2-LABEL: splatconstant_funnnel_v2i32: 468; SSE2: # %bb.0: 469; SSE2-NEXT: movdqa %xmm0, %xmm2 470; SSE2-NEXT: psrld $4, %xmm2 471; SSE2-NEXT: movdqa %xmm0, %xmm1 472; SSE2-NEXT: pslld $28, %xmm1 473; SSE2-NEXT: por %xmm2, %xmm1 474; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 475; SSE2-NEXT: movaps %xmm1, %xmm0 476; SSE2-NEXT: retq 477; 478; SSE41-LABEL: splatconstant_funnnel_v2i32: 479; SSE41: # %bb.0: 480; SSE41-NEXT: movdqa %xmm0, %xmm2 481; SSE41-NEXT: psrld $4, %xmm2 482; SSE41-NEXT: movdqa %xmm0, %xmm1 483; SSE41-NEXT: pslld $28, %xmm1 484; SSE41-NEXT: por %xmm2, %xmm1 485; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7] 486; SSE41-NEXT: movdqa %xmm1, %xmm0 487; SSE41-NEXT: retq 488; 489; AVX1-LABEL: splatconstant_funnnel_v2i32: 490; AVX1: # %bb.0: 491; AVX1-NEXT: vpsrld $4, %xmm0, %xmm1 492; AVX1-NEXT: vpslld $28, %xmm0, %xmm2 493; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 494; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 495; AVX1-NEXT: retq 496; 497; AVX2-LABEL: splatconstant_funnnel_v2i32: 498; AVX2: # %bb.0: 499; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 500; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 501; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 502; AVX2-NEXT: retq 503; 504; AVX512F-LABEL: splatconstant_funnnel_v2i32: 505; AVX512F: # %bb.0: 506; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 507; AVX512F-NEXT: vprord $4, %zmm0, %zmm0 508; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 509; AVX512F-NEXT: vzeroupper 510; AVX512F-NEXT: retq 511; 512; AVX512VL-LABEL: splatconstant_funnnel_v2i32: 513; AVX512VL: # %bb.0: 514; AVX512VL-NEXT: vprord $4, %xmm0, %xmm0 515; AVX512VL-NEXT: retq 516; 517; AVX512BW-LABEL: splatconstant_funnnel_v2i32: 518; AVX512BW: # %bb.0: 519; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 520; AVX512BW-NEXT: vprord $4, %zmm0, %zmm0 521; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 522; AVX512BW-NEXT: vzeroupper 523; AVX512BW-NEXT: retq 524; 525; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32: 526; AVX512VLBW: # %bb.0: 527; AVX512VLBW-NEXT: vprord $4, %xmm0, %xmm0 528; AVX512VLBW-NEXT: retq 529; 530; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32: 531; AVX512VBMI2: # %bb.0: 532; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 533; AVX512VBMI2-NEXT: vprord $4, %zmm0, %zmm0 534; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 535; AVX512VBMI2-NEXT: vzeroupper 536; AVX512VBMI2-NEXT: retq 537; 538; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32: 539; AVX512VLVBMI2: # %bb.0: 540; AVX512VLVBMI2-NEXT: vprord $4, %xmm0, %xmm0 541; AVX512VLVBMI2-NEXT: retq 542; 543; XOP-LABEL: splatconstant_funnnel_v2i32: 544; XOP: # %bb.0: 545; XOP-NEXT: vprotd $28, %xmm0, %xmm0 546; XOP-NEXT: retq 547; 548; X86-SSE2-LABEL: splatconstant_funnnel_v2i32: 549; X86-SSE2: # %bb.0: 550; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 551; X86-SSE2-NEXT: psrld $4, %xmm2 552; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 553; X86-SSE2-NEXT: pslld $28, %xmm1 554; X86-SSE2-NEXT: por %xmm2, %xmm1 555; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 556; X86-SSE2-NEXT: movaps %xmm1, %xmm0 557; X86-SSE2-NEXT: retl 558 %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>) 559 ret <2 x i32> %res 560} 561