1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64 4 5; Verify that we correctly fold target specific packed vector shifts by 6; immediate count into a simple build_vector when the elements of the vector 7; in input to the packed shift are all constants or undef. 8 9define <8 x i16> @test1() { 10; CHECK-LABEL: test1: 11; CHECK: # %bb.0: 12; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64] 13; CHECK-NEXT: ret{{[l|q]}} 14 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3) 15 ret <8 x i16> %1 16} 17 18define <8 x i16> @test2() { 19; CHECK-LABEL: test2: 20; CHECK: # %bb.0: 21; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 22; CHECK-NEXT: ret{{[l|q]}} 23 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 24 ret <8 x i16> %1 25} 26 27define <8 x i16> @test3() { 28; CHECK-LABEL: test3: 29; CHECK: # %bb.0: 30; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 31; CHECK-NEXT: ret{{[l|q]}} 32 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 33 ret <8 x i16> %1 34} 35 36define <4 x i32> @test4() { 37; CHECK-LABEL: test4: 38; CHECK: # %bb.0: 39; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64] 40; CHECK-NEXT: ret{{[l|q]}} 41 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3) 42 ret <4 x i32> %1 43} 44 45define <4 x i32> @test5() { 46; CHECK-LABEL: test5: 47; CHECK: # %bb.0: 48; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 49; CHECK-NEXT: ret{{[l|q]}} 50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 51 ret <4 x i32> %1 52} 53 54define <4 x i32> @test6() { 55; CHECK-LABEL: test6: 56; CHECK: # %bb.0: 57; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 58; CHECK-NEXT: ret{{[l|q]}} 59 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 60 ret <4 x i32> %1 61} 62 63define <2 x i64> @test7() { 64; X86-LABEL: test7: 65; X86: # %bb.0: 66; X86-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0] 67; X86-NEXT: retl 68; 69; X64-LABEL: test7: 70; X64: # %bb.0: 71; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16] 72; X64-NEXT: retq 73 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3) 74 ret <2 x i64> %1 75} 76 77define <2 x i64> @test8() { 78; X86-LABEL: test8: 79; X86: # %bb.0: 80; X86-NEXT: movaps {{.*#+}} xmm0 = [1,0,2,0] 81; X86-NEXT: retl 82; 83; X64-LABEL: test8: 84; X64: # %bb.0: 85; X64-NEXT: movaps {{.*#+}} xmm0 = [1,2] 86; X64-NEXT: retq 87 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3) 88 ret <2 x i64> %1 89} 90 91define <8 x i16> @test9() { 92; CHECK-LABEL: test9: 93; CHECK: # %bb.0: 94; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16] 95; CHECK-NEXT: ret{{[l|q]}} 96 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 97 ret <8 x i16> %1 98} 99 100define <4 x i32> @test10() { 101; CHECK-LABEL: test10: 102; CHECK: # %bb.0: 103; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4] 104; CHECK-NEXT: ret{{[l|q]}} 105 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 106 ret <4 x i32> %1 107} 108 109define <2 x i64> @test11() { 110; X86-LABEL: test11: 111; X86: # %bb.0: 112; X86-NEXT: movaps {{.*#+}} xmm0 = [0,0,3,0] 113; X86-NEXT: retl 114; 115; X64-LABEL: test11: 116; X64: # %bb.0: 117; X64-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0] 118; X64-NEXT: retq 119 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 120 ret <2 x i64> %1 121} 122 123define <8 x i16> @test12() { 124; CHECK-LABEL: test12: 125; CHECK: # %bb.0: 126; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16] 127; CHECK-NEXT: ret{{[l|q]}} 128 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 129 ret <8 x i16> %1 130} 131 132define <4 x i32> @test13() { 133; CHECK-LABEL: test13: 134; CHECK: # %bb.0: 135; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4] 136; CHECK-NEXT: ret{{[l|q]}} 137 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 138 ret <4 x i32> %1 139} 140 141define <8 x i16> @test14() { 142; CHECK-LABEL: test14: 143; CHECK: # %bb.0: 144; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16] 145; CHECK-NEXT: ret{{[l|q]}} 146 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 147 ret <8 x i16> %1 148} 149 150define <4 x i32> @test15() { 151; CHECK-LABEL: test15: 152; CHECK: # %bb.0: 153; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256] 154; CHECK-NEXT: ret{{[l|q]}} 155 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 156 ret <4 x i32> %1 157} 158 159define <2 x i64> @test16() { 160; X86-LABEL: test16: 161; X86: # %bb.0: 162; X86-NEXT: movaps {{.*#+}} xmm0 = [0,0,248,0] 163; X86-NEXT: retl 164; 165; X64-LABEL: test16: 166; X64: # %bb.0: 167; X64-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,248,0,0,0,0,0,0,0] 168; X64-NEXT: retq 169 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 170 ret <2 x i64> %1 171} 172 173; Make sure we fold fully undef input vectors. We previously folded only when 174; undef had a single use so use 2 undefs. 175define <4 x i32> @test17(<4 x i32> %a0, <4 x i32>* %dummy) { 176; X86-LABEL: test17: 177; X86: # %bb.0: 178; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 179; X86-NEXT: xorps %xmm0, %xmm0 180; X86-NEXT: movaps %xmm0, (%eax) 181; X86-NEXT: xorps %xmm0, %xmm0 182; X86-NEXT: retl 183; 184; X64-LABEL: test17: 185; X64: # %bb.0: 186; X64-NEXT: xorps %xmm0, %xmm0 187; X64-NEXT: movaps %xmm0, (%rdi) 188; X64-NEXT: xorps %xmm0, %xmm0 189; X64-NEXT: retq 190 %a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 6) 191 store <4 x i32> %a, <4 x i32>* %dummy 192 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 7) 193 ret <4 x i32> %res 194} 195 196define <4 x i32> @test18(<4 x i32> %a0, <4 x i32>* %dummy) { 197; X86-LABEL: test18: 198; X86: # %bb.0: 199; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 200; X86-NEXT: xorps %xmm0, %xmm0 201; X86-NEXT: movaps %xmm0, (%eax) 202; X86-NEXT: xorps %xmm0, %xmm0 203; X86-NEXT: retl 204; 205; X64-LABEL: test18: 206; X64: # %bb.0: 207; X64-NEXT: xorps %xmm0, %xmm0 208; X64-NEXT: movaps %xmm0, (%rdi) 209; X64-NEXT: xorps %xmm0, %xmm0 210; X64-NEXT: retq 211 %a = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 3) 212 store <4 x i32> %a, <4 x i32>* %dummy 213 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 1) 214 ret <4 x i32> %res 215} 216 217declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) 218declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) 219declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) 220declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) 221declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) 222declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) 223declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) 224declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) 225 226