1; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41 3 4define <4 x i32> @a(<4 x i32> %i) nounwind { 5; SSE2-LABEL: a: 6; SSE2: movdqa {{.*}}, %[[X1:xmm[0-9]+]] 7; SSE2-NEXT: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3] 8; SSE2-NEXT: pmuludq %[[X1]], %xmm0 9; SSE2-NEXT: pmuludq %[[X1]], %[[X2]] 10; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2] 11; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3] 12; SSE2-NEXT: retq 13; 14; SSE41-LABEL: a: 15; SSE41: pmulld 16; SSE41-NEXT: retq 17entry: 18 %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 > 19 ret <4 x i32> %A 20} 21 22define <2 x i64> @b(<2 x i64> %i) nounwind { 23; ALL-LABEL: b: 24; ALL: pmuludq 25; ALL: pmuludq 26; ALL: pmuludq 27entry: 28 %A = mul <2 x i64> %i, < i64 117, i64 117 > 29 ret <2 x i64> %A 30} 31 32define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind { 33; SSE2-LABEL: c: 34; SSE2: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3] 35; SSE2-NEXT: pmuludq %xmm1, %xmm0 36; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[1,1,3,3] 37; SSE2-NEXT: pmuludq %[[X2]], %xmm1 38; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[0,2] 39; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3] 40; SSE2-NEXT: retq 41; 42; SSE41-LABEL: c: 43; SSE41: pmulld 44; SSE41-NEXT: retq 45entry: 46 %A = mul <4 x i32> %i, %j 47 ret <4 x i32> %A 48} 49 50define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind { 51; ALL-LABEL: d: 52; ALL: pmuludq 53; ALL: pmuludq 54; ALL: pmuludq 55entry: 56 %A = mul <2 x i64> %i, %j 57 ret <2 x i64> %A 58} 59 60declare void @foo() 61 62define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind { 63; SSE2-LABEL: e: 64; SSE2: movdqa {{[0-9]*}}(%rsp), %xmm0 65; SSE2-NEXT: pshufd {{.*}} # [[X1:xmm[0-9]+]] = xmm0[1,1,3,3] 66; SSE2-NEXT: movdqa {{[0-9]*}}(%rsp), %[[X2:xmm[0-9]+]] 67; SSE2-NEXT: pmuludq %[[X2]], %xmm0 68; SSE2-NEXT: pshufd {{.*}} # [[X2]] = [[X2]][1,1,3,3] 69; SSE2-NEXT: pmuludq %[[X1]], %[[X2]] 70; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2] 71; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3] 72; SSE2-NEXT: addq ${{[0-9]+}}, %rsp 73; SSE2-NEXT: retq 74; 75; SSE41-LABEL: e: 76; SSE41: pmulld {{[0-9]+}}(%rsp), %xmm 77; SSE41-NEXT: addq ${{[0-9]+}}, %rsp 78; SSE41-NEXT: retq 79entry: 80 ; Use a call to force spills. 81 call void @foo() 82 %A = mul <4 x i32> %i, %j 83 ret <4 x i32> %A 84} 85 86define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind { 87; ALL-LABEL: f: 88; ALL: pmuludq 89; ALL: pmuludq 90; ALL: pmuludq 91entry: 92 ; Use a call to force spills. 93 call void @foo() 94 %A = mul <2 x i64> %i, %j 95 ret <2 x i64> %A 96} 97