1; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s 2; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=CHECK-YONAH %s 3 4define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { 5; CHECK-LABEL: test1: 6; CHECK: # BB#0: 7; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0] 8; CHECK-NEXT: retl 9; 10; CHECK-YONAH-LABEL: test1: 11; CHECK-YONAH: # BB#0: 12; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0] 13; CHECK-YONAH-NEXT: retl 14 %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 > 15 ret <4 x i32> %C 16} 17 18define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind { 19; CHECK-LABEL: test2: 20; CHECK: # BB#0: 21; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 22; CHECK-NEXT: movdqa %xmm1, %xmm0 23; CHECK-NEXT: retl 24; 25; CHECK-YONAH-LABEL: test2: 26; CHECK-YONAH: # BB#0: 27; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 28; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] 29; CHECK-YONAH-NEXT: retl 30 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 > 31 ret <4 x i32> %C 32} 33 34define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind { 35; CHECK-LABEL: test3: 36; CHECK: # BB#0: 37; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] 38; CHECK-NEXT: movdqa %xmm1, %xmm0 39; CHECK-NEXT: retl 40; 41; CHECK-YONAH-LABEL: test3: 42; CHECK-YONAH: # BB#0: 43; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0] 44; CHECK-YONAH-NEXT: retl 45 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 > 46 ret <4 x i32> %C 47} 48 49define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind { 50; CHECK-LABEL: test4: 51; CHECK: # BB#0: 52; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 53; CHECK-NEXT: retl 54; 55; CHECK-YONAH-LABEL: test4: 56; CHECK-YONAH: # BB#0: 57; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 58; CHECK-YONAH-NEXT: movapd %xmm1, %xmm0 59; CHECK-YONAH-NEXT: retl 60 %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 > 61 ret <4 x i32> %C 62} 63 64define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind { 65; CHECK-LABEL: test5: 66; CHECK: # BB#0: 67; CHECK-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 68; CHECK-NEXT: movapd %xmm1, %xmm0 69; CHECK-NEXT: retl 70; 71; CHECK-YONAH-LABEL: test5: 72; CHECK-YONAH: # BB#0: 73; CHECK-YONAH-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] 74; CHECK-YONAH-NEXT: movapd %xmm1, %xmm0 75; CHECK-YONAH-NEXT: retl 76 %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 > 77 ret <4 x float> %C 78} 79 80define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind { 81; CHECK-LABEL: test6: 82; CHECK: # BB#0: 83; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] 84; CHECK-NEXT: movdqa %xmm1, %xmm0 85; CHECK-NEXT: retl 86; 87; CHECK-YONAH-LABEL: test6: 88; CHECK-YONAH: # BB#0: 89; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 90; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5] 91; CHECK-YONAH-NEXT: por %xmm1, %xmm0 92; CHECK-YONAH-NEXT: retl 93 %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 > 94 ret <8 x i16> %C 95} 96 97define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind { 98; CHECK-LABEL: test7: 99; CHECK: # BB#0: 100; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9] 101; CHECK-NEXT: movdqa %xmm1, %xmm0 102; CHECK-NEXT: retl 103; 104; CHECK-YONAH-LABEL: test7: 105; CHECK-YONAH: # BB#0: 106; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 107; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9] 108; CHECK-YONAH-NEXT: por %xmm1, %xmm0 109; CHECK-YONAH-NEXT: retl 110 %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 > 111 ret <8 x i16> %C 112} 113 114define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind { 115; CHECK-LABEL: test8: 116; CHECK: # BB#0: 117; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4] 118; CHECK-NEXT: movdqa %xmm1, %xmm0 119; CHECK-NEXT: retl 120; 121; CHECK-YONAH-LABEL: test8: 122; CHECK-YONAH: # BB#0: 123; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 124; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] 125; CHECK-YONAH-NEXT: por %xmm1, %xmm0 126; CHECK-YONAH-NEXT: retl 127 %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 > 128 ret <16 x i8> %C 129} 130 131; Check that we don't do unary (circular on single operand) palignr incorrectly. 132; (It is possible, but before this testcase was committed, it was being done 133; incorrectly. In particular, one of the operands of the palignr node 134; was an UNDEF.) 135define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind { 136; CHECK-LABEL: test9: 137; CHECK: # BB#0: 138; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1] 139; CHECK-NEXT: movdqa %xmm1, %xmm0 140; CHECK-NEXT: retl 141; 142; CHECK-YONAH-LABEL: test9: 143; CHECK-YONAH: # BB#0: 144; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0 145; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 146; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] 147; CHECK-YONAH-NEXT: por %xmm0, %xmm1 148; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0 149; CHECK-YONAH-NEXT: retl 150 %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 > 151 ret <8 x i16> %C 152} 153 154