1; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
3
4define <4 x i32> @a(<4 x i32> %i) nounwind  {
5; SSE2-LABEL: a:
6; SSE2:         movdqa {{.*}}, %[[X1:xmm[0-9]+]]
7; SSE2-NEXT:    pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3]
8; SSE2-NEXT:    pmuludq %[[X1]], %xmm0
9; SSE2-NEXT:    pmuludq %[[X1]], %[[X2]]
10; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2]
11; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
12; SSE2-NEXT:    retq
13;
14; SSE41-LABEL: a:
15; SSE41:         pmulld
16; SSE41-NEXT:    retq
17entry:
18  %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
19  ret <4 x i32> %A
20}
21
22define <2 x i64> @b(<2 x i64> %i) nounwind  {
23; ALL-LABEL: b:
24; ALL:         pmuludq
25; ALL:         pmuludq
26; ALL:         pmuludq
27entry:
28  %A = mul <2 x i64> %i, < i64 117, i64 117 >
29  ret <2 x i64> %A
30}
31
32define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind  {
33; SSE2-LABEL: c:
34; SSE2:         pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3]
35; SSE2-NEXT:    pmuludq %xmm1, %xmm0
36; SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[1,1,3,3]
37; SSE2-NEXT:    pmuludq %[[X2]], %xmm1
38; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[0,2]
39; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
40; SSE2-NEXT:    retq
41;
42; SSE41-LABEL: c:
43; SSE41:         pmulld
44; SSE41-NEXT:    retq
45entry:
46  %A = mul <4 x i32> %i, %j
47  ret <4 x i32> %A
48}
49
50define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind  {
51; ALL-LABEL: d:
52; ALL:         pmuludq
53; ALL:         pmuludq
54; ALL:         pmuludq
55entry:
56  %A = mul <2 x i64> %i, %j
57  ret <2 x i64> %A
58}
59
60declare void @foo()
61
62define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind  {
63; SSE2-LABEL: e:
64; SSE2:         movdqa {{[0-9]*}}(%rsp), %xmm0
65; SSE2-NEXT:    pshufd {{.*}} # [[X1:xmm[0-9]+]] = xmm0[1,1,3,3]
66; SSE2-NEXT:    movdqa {{[0-9]*}}(%rsp), %[[X2:xmm[0-9]+]]
67; SSE2-NEXT:    pmuludq %[[X2]], %xmm0
68; SSE2-NEXT:    pshufd {{.*}} # [[X2]] = [[X2]][1,1,3,3]
69; SSE2-NEXT:    pmuludq %[[X1]], %[[X2]]
70; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2]
71; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
72; SSE2-NEXT:    addq ${{[0-9]+}}, %rsp
73; SSE2-NEXT:    retq
74;
75; SSE41-LABEL: e:
76; SSE41:         pmulld {{[0-9]+}}(%rsp), %xmm
77; SSE41-NEXT:    addq ${{[0-9]+}}, %rsp
78; SSE41-NEXT:    retq
79entry:
80  ; Use a call to force spills.
81  call void @foo()
82  %A = mul <4 x i32> %i, %j
83  ret <4 x i32> %A
84}
85
86define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind  {
87; ALL-LABEL: f:
88; ALL:         pmuludq
89; ALL:         pmuludq
90; ALL:         pmuludq
91entry:
92  ; Use a call to force spills.
93  call void @foo()
94  %A = mul <2 x i64> %i, %j
95  ret <2 x i64> %A
96}
97