1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown   -mattr=+mmx          | FileCheck %s --check-prefixes=X86,X86-MMX
3; RUN: llc < %s -mtriple=i686-unknown-unknown   -mattr=+mmx,+sse2    | FileCheck %s --check-prefixes=X86,X86-SSE
4; RUN: llc < %s -mtriple=i686-unknown-unknown   -mattr=+mmx,+ssse3   | FileCheck %s --check-prefixes=X86,X86-SSE
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2    | FileCheck %s --check-prefix=X64
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3   | FileCheck %s --check-prefix=X64
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx     | FileCheck %s --check-prefix=X64
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2    | FileCheck %s --check-prefix=X64
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefix=X64
10
11declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
12
13;
14; v2i32
15;
16
17define void @build_v2i32_01(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
18; X86-LABEL: build_v2i32_01:
19; X86:       # %bb.0:
20; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
21; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
22; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
23; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
24; X86-NEXT:    paddd %mm1, %mm1
25; X86-NEXT:    movq %mm1, (%eax)
26; X86-NEXT:    retl
27;
28; X64-LABEL: build_v2i32_01:
29; X64:       # %bb.0:
30; X64-NEXT:    movd %edx, %mm0
31; X64-NEXT:    movd %esi, %mm1
32; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
33; X64-NEXT:    paddd %mm1, %mm1
34; X64-NEXT:    movq %mm1, (%rdi)
35; X64-NEXT:    retq
36  %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
37  %2 = insertelement <2 x i32>    %1, i32 %a1, i32 1
38  %3 = bitcast <2 x i32> %2 to x86_mmx
39  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
40  store x86_mmx %4, x86_mmx *%p0
41  ret void
42}
43
44define void @build_v2i32_0z(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
45; X86-LABEL: build_v2i32_0z:
46; X86:       # %bb.0:
47; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
48; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
49; X86-NEXT:    paddd %mm0, %mm0
50; X86-NEXT:    movq %mm0, (%eax)
51; X86-NEXT:    retl
52;
53; X64-LABEL: build_v2i32_0z:
54; X64:       # %bb.0:
55; X64-NEXT:    movd %esi, %mm0
56; X64-NEXT:    paddd %mm0, %mm0
57; X64-NEXT:    movq %mm0, (%rdi)
58; X64-NEXT:    retq
59  %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
60  %2 = insertelement <2 x i32>    %1, i32   0, i32 1
61  %3 = bitcast <2 x i32> %2 to x86_mmx
62  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
63  store x86_mmx %4, x86_mmx *%p0
64  ret void
65}
66
67define void @build_v2i32_u1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
68; X86-MMX-LABEL: build_v2i32_u1:
69; X86-MMX:       # %bb.0:
70; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
71; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
72; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
73; X86-MMX-NEXT:    paddd %mm0, %mm0
74; X86-MMX-NEXT:    movq %mm0, (%eax)
75; X86-MMX-NEXT:    retl
76;
77; X86-SSE-LABEL: build_v2i32_u1:
78; X86-SSE:       # %bb.0:
79; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
80; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %mm0
81; X86-SSE-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
82; X86-SSE-NEXT:    paddd %mm0, %mm0
83; X86-SSE-NEXT:    movq %mm0, (%eax)
84; X86-SSE-NEXT:    retl
85;
86; X64-LABEL: build_v2i32_u1:
87; X64:       # %bb.0:
88; X64-NEXT:    movd %edx, %mm0
89; X64-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
90; X64-NEXT:    paddd %mm0, %mm0
91; X64-NEXT:    movq %mm0, (%rdi)
92; X64-NEXT:    retq
93  %1 = insertelement <2 x i32> undef, i32 undef, i32 0
94  %2 = insertelement <2 x i32>    %1, i32   %a1, i32 1
95  %3 = bitcast <2 x i32> %2 to x86_mmx
96  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
97  store x86_mmx %4, x86_mmx *%p0
98  ret void
99}
100
101define void @build_v2i32_z1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
102; X86-LABEL: build_v2i32_z1:
103; X86:       # %bb.0:
104; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
105; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
106; X86-NEXT:    pxor %mm1, %mm1
107; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
108; X86-NEXT:    paddd %mm1, %mm1
109; X86-NEXT:    movq %mm1, (%eax)
110; X86-NEXT:    retl
111;
112; X64-LABEL: build_v2i32_z1:
113; X64:       # %bb.0:
114; X64-NEXT:    movd %edx, %mm0
115; X64-NEXT:    pxor %mm1, %mm1
116; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
117; X64-NEXT:    paddd %mm1, %mm1
118; X64-NEXT:    movq %mm1, (%rdi)
119; X64-NEXT:    retq
120  %1 = insertelement <2 x i32> undef, i32   0, i32 0
121  %2 = insertelement <2 x i32>    %1, i32 %a1, i32 1
122  %3 = bitcast <2 x i32> %2 to x86_mmx
123  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
124  store x86_mmx %4, x86_mmx *%p0
125  ret void
126}
127
128define void @build_v2i32_00(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind {
129; X86-MMX-LABEL: build_v2i32_00:
130; X86-MMX:       # %bb.0:
131; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
132; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
133; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
134; X86-MMX-NEXT:    paddd %mm0, %mm0
135; X86-MMX-NEXT:    movq %mm0, (%eax)
136; X86-MMX-NEXT:    retl
137;
138; X86-SSE-LABEL: build_v2i32_00:
139; X86-SSE:       # %bb.0:
140; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
141; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %mm0
142; X86-SSE-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
143; X86-SSE-NEXT:    paddd %mm0, %mm0
144; X86-SSE-NEXT:    movq %mm0, (%eax)
145; X86-SSE-NEXT:    retl
146;
147; X64-LABEL: build_v2i32_00:
148; X64:       # %bb.0:
149; X64-NEXT:    movd %esi, %mm0
150; X64-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
151; X64-NEXT:    paddd %mm0, %mm0
152; X64-NEXT:    movq %mm0, (%rdi)
153; X64-NEXT:    retq
154  %1 = insertelement <2 x i32> undef, i32 %a0, i32 0
155  %2 = insertelement <2 x i32>    %1, i32 %a0, i32 1
156  %3 = bitcast <2 x i32> %2 to x86_mmx
157  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
158  store x86_mmx %4, x86_mmx *%p0
159  ret void
160}
161
162;
163; v4i16
164;
165
166define void @build_v4i16_0123(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
167; X86-LABEL: build_v4i16_0123:
168; X86:       # %bb.0:
169; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
170; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
171; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
172; X86-NEXT:    punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
173; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
174; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
175; X86-NEXT:    punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1]
176; X86-NEXT:    punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0]
177; X86-NEXT:    paddd %mm2, %mm2
178; X86-NEXT:    movq %mm2, (%eax)
179; X86-NEXT:    retl
180;
181; X64-LABEL: build_v4i16_0123:
182; X64:       # %bb.0:
183; X64-NEXT:    movd %r8d, %mm0
184; X64-NEXT:    movd %ecx, %mm1
185; X64-NEXT:    punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
186; X64-NEXT:    movd %edx, %mm0
187; X64-NEXT:    movd %esi, %mm2
188; X64-NEXT:    punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1]
189; X64-NEXT:    punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0]
190; X64-NEXT:    paddd %mm2, %mm2
191; X64-NEXT:    movq %mm2, (%rdi)
192; X64-NEXT:    retq
193  %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
194  %2 = insertelement <4 x i16>    %1, i16 %a1, i32 1
195  %3 = insertelement <4 x i16>    %2, i16 %a2, i32 2
196  %4 = insertelement <4 x i16>    %3, i16 %a3, i32 3
197  %5 = bitcast <4 x i16> %4 to x86_mmx
198  %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
199  store x86_mmx %6, x86_mmx *%p0
200  ret void
201}
202
203define void @build_v4i16_01zz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
204; X86-LABEL: build_v4i16_01zz:
205; X86:       # %bb.0:
206; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
207; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
208; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
209; X86-NEXT:    punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
210; X86-NEXT:    pxor %mm0, %mm0
211; X86-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
212; X86-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
213; X86-NEXT:    paddd %mm1, %mm1
214; X86-NEXT:    movq %mm1, (%eax)
215; X86-NEXT:    retl
216;
217; X64-LABEL: build_v4i16_01zz:
218; X64:       # %bb.0:
219; X64-NEXT:    movd %edx, %mm0
220; X64-NEXT:    movd %esi, %mm1
221; X64-NEXT:    punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1]
222; X64-NEXT:    pxor %mm0, %mm0
223; X64-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
224; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
225; X64-NEXT:    paddd %mm1, %mm1
226; X64-NEXT:    movq %mm1, (%rdi)
227; X64-NEXT:    retq
228  %1 = insertelement <4 x i16> undef, i16 %a0, i32 0
229  %2 = insertelement <4 x i16>    %1, i16 %a1, i32 1
230  %3 = insertelement <4 x i16>    %2, i16   0, i32 2
231  %4 = insertelement <4 x i16>    %3, i16   0, i32 3
232  %5 = bitcast <4 x i16> %4 to x86_mmx
233  %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
234  store x86_mmx %6, x86_mmx *%p0
235  ret void
236}
237
238define void @build_v4i16_0uuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
239; X86-LABEL: build_v4i16_0uuz:
240; X86:       # %bb.0:
241; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
242; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
243; X86-NEXT:    paddd %mm0, %mm0
244; X86-NEXT:    movq %mm0, (%eax)
245; X86-NEXT:    retl
246;
247; X64-LABEL: build_v4i16_0uuz:
248; X64:       # %bb.0:
249; X64-NEXT:    movd %esi, %mm0
250; X64-NEXT:    paddd %mm0, %mm0
251; X64-NEXT:    movq %mm0, (%rdi)
252; X64-NEXT:    retq
253  %1 = insertelement <4 x i16> undef, i16   %a0, i32 0
254  %2 = insertelement <4 x i16>    %1, i16 undef, i32 1
255  %3 = insertelement <4 x i16>    %2, i16 undef, i32 2
256  %4 = insertelement <4 x i16>    %3, i16     0, i32 3
257  %5 = bitcast <4 x i16> %4 to x86_mmx
258  %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
259  store x86_mmx %6, x86_mmx *%p0
260  ret void
261}
262
263define void @build_v4i16_0zuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
264; X86-LABEL: build_v4i16_0zuz:
265; X86:       # %bb.0:
266; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
267; X86-NEXT:    movd %eax, %mm0
268; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
269; X86-NEXT:    paddd %mm0, %mm0
270; X86-NEXT:    movq %mm0, (%eax)
271; X86-NEXT:    retl
272;
273; X64-LABEL: build_v4i16_0zuz:
274; X64:       # %bb.0:
275; X64-NEXT:    movzwl %si, %eax
276; X64-NEXT:    movd %eax, %mm0
277; X64-NEXT:    paddd %mm0, %mm0
278; X64-NEXT:    movq %mm0, (%rdi)
279; X64-NEXT:    retq
280  %1 = insertelement <4 x i16> undef, i16   %a0, i32 0
281  %2 = insertelement <4 x i16>    %1, i16     0, i32 1
282  %3 = insertelement <4 x i16>    %2, i16 undef, i32 2
283  %4 = insertelement <4 x i16>    %3, i16     0, i32 3
284  %5 = bitcast <4 x i16> %4 to x86_mmx
285  %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
286  store x86_mmx %6, x86_mmx *%p0
287  ret void
288}
289
290define void @build_v4i16_012u(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
291; X86-LABEL: build_v4i16_012u:
292; X86:       # %bb.0:
293; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
294; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
295; X86-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
296; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
297; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
298; X86-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
299; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
300; X86-NEXT:    paddd %mm2, %mm2
301; X86-NEXT:    movq %mm2, (%eax)
302; X86-NEXT:    retl
303;
304; X64-LABEL: build_v4i16_012u:
305; X64:       # %bb.0:
306; X64-NEXT:    movd %ecx, %mm0
307; X64-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
308; X64-NEXT:    movd %edx, %mm1
309; X64-NEXT:    movd %esi, %mm2
310; X64-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
311; X64-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
312; X64-NEXT:    paddd %mm2, %mm2
313; X64-NEXT:    movq %mm2, (%rdi)
314; X64-NEXT:    retq
315  %1 = insertelement <4 x i16> undef, i16   %a0, i32 0
316  %2 = insertelement <4 x i16>    %1, i16   %a1, i32 1
317  %3 = insertelement <4 x i16>    %2, i16   %a2, i32 2
318  %4 = insertelement <4 x i16>    %3, i16 undef, i32 3
319  %5 = bitcast <4 x i16> %4 to x86_mmx
320  %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
321  store x86_mmx %6, x86_mmx *%p0
322  ret void
323}
324
325define void @build_v4i16_0u00(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind {
326; X86-MMX-LABEL: build_v4i16_0u00:
327; X86-MMX:       # %bb.0:
328; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
329; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
330; X86-MMX-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
331; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
332; X86-MMX-NEXT:    paddd %mm0, %mm0
333; X86-MMX-NEXT:    movq %mm0, (%eax)
334; X86-MMX-NEXT:    retl
335;
336; X86-SSE-LABEL: build_v4i16_0u00:
337; X86-SSE:       # %bb.0:
338; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
339; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %mm0
340; X86-SSE-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
341; X86-SSE-NEXT:    paddd %mm0, %mm0
342; X86-SSE-NEXT:    movq %mm0, (%eax)
343; X86-SSE-NEXT:    retl
344;
345; X64-LABEL: build_v4i16_0u00:
346; X64:       # %bb.0:
347; X64-NEXT:    movd %esi, %mm0
348; X64-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
349; X64-NEXT:    paddd %mm0, %mm0
350; X64-NEXT:    movq %mm0, (%rdi)
351; X64-NEXT:    retq
352  %1 = insertelement <4 x i16> undef, i16   %a0, i32 0
353  %2 = insertelement <4 x i16>    %1, i16 undef, i32 1
354  %3 = insertelement <4 x i16>    %2, i16   %a0, i32 2
355  %4 = insertelement <4 x i16>    %3, i16   %a0, i32 3
356  %5 = bitcast <4 x i16> %4 to x86_mmx
357  %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5)
358  store x86_mmx %6, x86_mmx *%p0
359  ret void
360}
361
362;
363; v8i8
364;
365
366define void @build_v8i8_01234567(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
367; X86-LABEL: build_v8i8_01234567:
368; X86:       # %bb.0:
369; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
370; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
371; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
372; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
373; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
374; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
375; X86-NEXT:    punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
376; X86-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
377; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
378; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
379; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
380; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
381; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm3
382; X86-NEXT:    punpcklbw %mm0, %mm3 # mm3 = mm3[0],mm0[0],mm3[1],mm0[1],mm3[2],mm0[2],mm3[3],mm0[3]
383; X86-NEXT:    punpcklwd %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1]
384; X86-NEXT:    punpckldq %mm2, %mm3 # mm3 = mm3[0],mm2[0]
385; X86-NEXT:    paddd %mm3, %mm3
386; X86-NEXT:    movq %mm3, (%eax)
387; X86-NEXT:    retl
388;
389; X64-LABEL: build_v8i8_01234567:
390; X64:       # %bb.0:
391; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm0
392; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm1
393; X64-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
394; X64-NEXT:    movd %r9d, %mm0
395; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm2
396; X64-NEXT:    punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3]
397; X64-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
398; X64-NEXT:    movd %r8d, %mm1
399; X64-NEXT:    movd %ecx, %mm2
400; X64-NEXT:    punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
401; X64-NEXT:    movd %edx, %mm1
402; X64-NEXT:    movd %esi, %mm3
403; X64-NEXT:    punpcklbw %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1],mm3[2],mm1[2],mm3[3],mm1[3]
404; X64-NEXT:    punpcklwd %mm2, %mm3 # mm3 = mm3[0],mm2[0],mm3[1],mm2[1]
405; X64-NEXT:    punpckldq %mm0, %mm3 # mm3 = mm3[0],mm0[0]
406; X64-NEXT:    paddd %mm3, %mm3
407; X64-NEXT:    movq %mm3, (%rdi)
408; X64-NEXT:    retq
409  %1  = insertelement <8 x i8> undef, i8 %a0, i32 0
410  %2  = insertelement <8 x i8>    %1, i8 %a1, i32 1
411  %3  = insertelement <8 x i8>    %2, i8 %a2, i32 2
412  %4  = insertelement <8 x i8>    %3, i8 %a3, i32 3
413  %5  = insertelement <8 x i8>    %4, i8 %a4, i32 4
414  %6  = insertelement <8 x i8>    %5, i8 %a5, i32 5
415  %7  = insertelement <8 x i8>    %6, i8 %a6, i32 6
416  %8  = insertelement <8 x i8>    %7, i8 %a7, i32 7
417  %9  = bitcast <8 x i8> %8 to x86_mmx
418  %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
419  store x86_mmx %10, x86_mmx *%p0
420  ret void
421}
422
423define void @build_v8i8_0u2345z7(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
424; X86-LABEL: build_v8i8_0u2345z7:
425; X86:       # %bb.0:
426; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
427; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
428; X86-NEXT:    pxor %mm1, %mm1
429; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
430; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
431; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
432; X86-NEXT:    punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
433; X86-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
434; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
435; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
436; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
437; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
438; X86-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
439; X86-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
440; X86-NEXT:    punpckldq %mm2, %mm0 # mm0 = mm0[0],mm2[0]
441; X86-NEXT:    paddd %mm0, %mm0
442; X86-NEXT:    movq %mm0, (%eax)
443; X86-NEXT:    retl
444;
445; X64-LABEL: build_v8i8_0u2345z7:
446; X64:       # %bb.0:
447; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm0
448; X64-NEXT:    pxor %mm1, %mm1
449; X64-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
450; X64-NEXT:    movd %r9d, %mm0
451; X64-NEXT:    movd {{[0-9]+}}(%rsp), %mm2
452; X64-NEXT:    punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3]
453; X64-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
454; X64-NEXT:    movd %r8d, %mm1
455; X64-NEXT:    movd %ecx, %mm2
456; X64-NEXT:    punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3]
457; X64-NEXT:    movd %esi, %mm1
458; X64-NEXT:    punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3]
459; X64-NEXT:    punpcklwd %mm2, %mm1 # mm1 = mm1[0],mm2[0],mm1[1],mm2[1]
460; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
461; X64-NEXT:    paddd %mm1, %mm1
462; X64-NEXT:    movq %mm1, (%rdi)
463; X64-NEXT:    retq
464  %1  = insertelement <8 x i8> undef, i8   %a0, i32 0
465  %2  = insertelement <8 x i8>    %1, i8 undef, i32 1
466  %3  = insertelement <8 x i8>    %2, i8   %a2, i32 2
467  %4  = insertelement <8 x i8>    %3, i8   %a3, i32 3
468  %5  = insertelement <8 x i8>    %4, i8   %a4, i32 4
469  %6  = insertelement <8 x i8>    %5, i8   %a5, i32 5
470  %7  = insertelement <8 x i8>    %6, i8    0,  i32 6
471  %8  = insertelement <8 x i8>    %7, i8   %a7, i32 7
472  %9  = bitcast <8 x i8> %8 to x86_mmx
473  %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
474  store x86_mmx %10, x86_mmx *%p0
475  ret void
476}
477
478define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
479; X86-LABEL: build_v8i8_0123zzzu:
480; X86:       # %bb.0:
481; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
482; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
483; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm1
484; X86-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
485; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
486; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm2
487; X86-NEXT:    punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
488; X86-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
489; X86-NEXT:    pxor %mm0, %mm0
490; X86-NEXT:    pxor %mm1, %mm1
491; X86-NEXT:    punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3]
492; X86-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
493; X86-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
494; X86-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
495; X86-NEXT:    paddd %mm2, %mm2
496; X86-NEXT:    movq %mm2, (%eax)
497; X86-NEXT:    retl
498;
499; X64-LABEL: build_v8i8_0123zzzu:
500; X64:       # %bb.0:
501; X64-NEXT:    movd %r8d, %mm0
502; X64-NEXT:    movd %ecx, %mm1
503; X64-NEXT:    punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3]
504; X64-NEXT:    movd %edx, %mm0
505; X64-NEXT:    movd %esi, %mm2
506; X64-NEXT:    punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3]
507; X64-NEXT:    punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1]
508; X64-NEXT:    pxor %mm0, %mm0
509; X64-NEXT:    pxor %mm1, %mm1
510; X64-NEXT:    punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3]
511; X64-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
512; X64-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
513; X64-NEXT:    punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0]
514; X64-NEXT:    paddd %mm2, %mm2
515; X64-NEXT:    movq %mm2, (%rdi)
516; X64-NEXT:    retq
517  %1  = insertelement <8 x i8> undef, i8   %a0, i32 0
518  %2  = insertelement <8 x i8>    %1, i8   %a1, i32 1
519  %3  = insertelement <8 x i8>    %2, i8   %a2, i32 2
520  %4  = insertelement <8 x i8>    %3, i8   %a3, i32 3
521  %5  = insertelement <8 x i8>    %4, i8     0, i32 4
522  %6  = insertelement <8 x i8>    %5, i8     0, i32 5
523  %7  = insertelement <8 x i8>    %6, i8     0, i32 6
524  %8  = insertelement <8 x i8>    %7, i8 undef, i32 7
525  %9  = bitcast <8 x i8> %8 to x86_mmx
526  %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
527  store x86_mmx %10, x86_mmx *%p0
528  ret void
529}
530
531define void @build_v8i8_0uuuuzzz(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
532; X86-LABEL: build_v8i8_0uuuuzzz:
533; X86:       # %bb.0:
534; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
535; X86-NEXT:    movd {{[0-9]+}}(%esp), %mm0
536; X86-NEXT:    paddd %mm0, %mm0
537; X86-NEXT:    movq %mm0, (%eax)
538; X86-NEXT:    retl
539;
540; X64-LABEL: build_v8i8_0uuuuzzz:
541; X64:       # %bb.0:
542; X64-NEXT:    movd %esi, %mm0
543; X64-NEXT:    paddd %mm0, %mm0
544; X64-NEXT:    movq %mm0, (%rdi)
545; X64-NEXT:    retq
546  %1  = insertelement <8 x i8> undef, i8   %a0, i32 0
547  %2  = insertelement <8 x i8>    %1, i8 undef, i32 1
548  %3  = insertelement <8 x i8>    %2, i8 undef, i32 2
549  %4  = insertelement <8 x i8>    %3, i8 undef, i32 3
550  %5  = insertelement <8 x i8>    %4, i8 undef, i32 4
551  %6  = insertelement <8 x i8>    %5, i8     0, i32 5
552  %7  = insertelement <8 x i8>    %6, i8     0, i32 6
553  %8  = insertelement <8 x i8>    %7, i8     0, i32 7
554  %9  = bitcast <8 x i8> %8 to x86_mmx
555  %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
556  store x86_mmx %10, x86_mmx *%p0
557  ret void
558}
559
560define void @build_v8i8_0zzzzzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
561; X86-LABEL: build_v8i8_0zzzzzzu:
562; X86:       # %bb.0:
563; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
564; X86-NEXT:    movd %eax, %mm0
565; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
566; X86-NEXT:    paddd %mm0, %mm0
567; X86-NEXT:    movq %mm0, (%eax)
568; X86-NEXT:    retl
569;
570; X64-LABEL: build_v8i8_0zzzzzzu:
571; X64:       # %bb.0:
572; X64-NEXT:    movzbl %sil, %eax
573; X64-NEXT:    movd %eax, %mm0
574; X64-NEXT:    paddd %mm0, %mm0
575; X64-NEXT:    movq %mm0, (%rdi)
576; X64-NEXT:    retq
577  %1  = insertelement <8 x i8> undef, i8   %a0, i32 0
578  %2  = insertelement <8 x i8>    %1, i8     0, i32 1
579  %3  = insertelement <8 x i8>    %2, i8     0, i32 2
580  %4  = insertelement <8 x i8>    %3, i8     0, i32 3
581  %5  = insertelement <8 x i8>    %4, i8     0, i32 4
582  %6  = insertelement <8 x i8>    %5, i8     0, i32 5
583  %7  = insertelement <8 x i8>    %6, i8     0, i32 6
584  %8  = insertelement <8 x i8>    %7, i8 undef, i32 7
585  %9  = bitcast <8 x i8> %8 to x86_mmx
586  %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
587  store x86_mmx %10, x86_mmx *%p0
588  ret void
589}
590
591define void @build_v8i8_00000000(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind {
592; X86-MMX-LABEL: build_v8i8_00000000:
593; X86-MMX:       # %bb.0:
594; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
595; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
596; X86-MMX-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
597; X86-MMX-NEXT:    punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1]
598; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
599; X86-MMX-NEXT:    paddd %mm0, %mm0
600; X86-MMX-NEXT:    movq %mm0, (%eax)
601; X86-MMX-NEXT:    retl
602;
603; X86-SSE-LABEL: build_v8i8_00000000:
604; X86-SSE:       # %bb.0:
605; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
606; X86-SSE-NEXT:    movd {{[0-9]+}}(%esp), %mm0
607; X86-SSE-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
608; X86-SSE-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
609; X86-SSE-NEXT:    paddd %mm0, %mm0
610; X86-SSE-NEXT:    movq %mm0, (%eax)
611; X86-SSE-NEXT:    retl
612;
613; X64-LABEL: build_v8i8_00000000:
614; X64:       # %bb.0:
615; X64-NEXT:    movd %esi, %mm0
616; X64-NEXT:    punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3]
617; X64-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0]
618; X64-NEXT:    paddd %mm0, %mm0
619; X64-NEXT:    movq %mm0, (%rdi)
620; X64-NEXT:    retq
621  %1  = insertelement <8 x i8> undef, i8 %a0, i32 0
622  %2  = insertelement <8 x i8>    %1, i8 %a0, i32 1
623  %3  = insertelement <8 x i8>    %2, i8 %a0, i32 2
624  %4  = insertelement <8 x i8>    %3, i8 %a0, i32 3
625  %5  = insertelement <8 x i8>    %4, i8 %a0, i32 4
626  %6  = insertelement <8 x i8>    %5, i8 %a0, i32 5
627  %7  = insertelement <8 x i8>    %6, i8 %a0, i32 6
628  %8  = insertelement <8 x i8>    %7, i8 %a0, i32 7
629  %9  = bitcast <8 x i8> %8 to x86_mmx
630  %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9)
631  store x86_mmx %10, x86_mmx *%p0
632  ret void
633}
634
635;
636; v2f32
637;
638
639define void @build_v2f32_01(x86_mmx *%p0, float %a0, float %a1) nounwind {
640; X86-MMX-LABEL: build_v2f32_01:
641; X86-MMX:       # %bb.0:
642; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
643; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
644; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm1
645; X86-MMX-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
646; X86-MMX-NEXT:    paddd %mm1, %mm1
647; X86-MMX-NEXT:    movq %mm1, (%eax)
648; X86-MMX-NEXT:    retl
649;
650; X86-SSE-LABEL: build_v2f32_01:
651; X86-SSE:       # %bb.0:
652; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
653; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
654; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
655; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
656; X86-SSE-NEXT:    movdq2q %xmm0, %mm1
657; X86-SSE-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
658; X86-SSE-NEXT:    paddd %mm1, %mm1
659; X86-SSE-NEXT:    movq %mm1, (%eax)
660; X86-SSE-NEXT:    retl
661;
662; X64-LABEL: build_v2f32_01:
663; X64:       # %bb.0:
664; X64-NEXT:    movdq2q %xmm1, %mm0
665; X64-NEXT:    movdq2q %xmm0, %mm1
666; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
667; X64-NEXT:    paddd %mm1, %mm1
668; X64-NEXT:    movq %mm1, (%rdi)
669; X64-NEXT:    retq
670  %1 = insertelement <2 x float> undef, float %a0, i32 0
671  %2 = insertelement <2 x float>    %1, float %a1, i32 1
672  %3 = bitcast <2 x float> %2 to x86_mmx
673  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
674  store x86_mmx %4, x86_mmx *%p0
675  ret void
676}
677
678define void @build_v2f32_0z(x86_mmx *%p0, float %a0, float %a1) nounwind {
679; X86-MMX-LABEL: build_v2f32_0z:
680; X86-MMX:       # %bb.0:
681; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
682; X86-MMX-NEXT:    pxor %mm0, %mm0
683; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm1
684; X86-MMX-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
685; X86-MMX-NEXT:    paddd %mm1, %mm1
686; X86-MMX-NEXT:    movq %mm1, (%eax)
687; X86-MMX-NEXT:    retl
688;
689; X86-SSE-LABEL: build_v2f32_0z:
690; X86-SSE:       # %bb.0:
691; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
692; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
693; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
694; X86-SSE-NEXT:    pxor %mm1, %mm1
695; X86-SSE-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0]
696; X86-SSE-NEXT:    paddd %mm0, %mm0
697; X86-SSE-NEXT:    movq %mm0, (%eax)
698; X86-SSE-NEXT:    retl
699;
700; X64-LABEL: build_v2f32_0z:
701; X64:       # %bb.0:
702; X64-NEXT:    movdq2q %xmm0, %mm0
703; X64-NEXT:    pxor %mm1, %mm1
704; X64-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0]
705; X64-NEXT:    paddd %mm0, %mm0
706; X64-NEXT:    movq %mm0, (%rdi)
707; X64-NEXT:    retq
708  %1 = insertelement <2 x float> undef, float %a0, i32 0
709  %2 = insertelement <2 x float>    %1, float 0.0, i32 1
710  %3 = bitcast <2 x float> %2 to x86_mmx
711  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
712  store x86_mmx %4, x86_mmx *%p0
713  ret void
714}
715
716define void @build_v2f32_u1(x86_mmx *%p0, float %a0, float %a1) nounwind {
717; X86-MMX-LABEL: build_v2f32_u1:
718; X86-MMX:       # %bb.0:
719; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
720; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
721; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
722; X86-MMX-NEXT:    paddd %mm0, %mm0
723; X86-MMX-NEXT:    movq %mm0, (%eax)
724; X86-MMX-NEXT:    retl
725;
726; X86-SSE-LABEL: build_v2f32_u1:
727; X86-SSE:       # %bb.0:
728; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
729; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
730; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
731; X86-SSE-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
732; X86-SSE-NEXT:    paddd %mm0, %mm0
733; X86-SSE-NEXT:    movq %mm0, (%eax)
734; X86-SSE-NEXT:    retl
735;
736; X64-LABEL: build_v2f32_u1:
737; X64:       # %bb.0:
738; X64-NEXT:    movdq2q %xmm1, %mm0
739; X64-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
740; X64-NEXT:    paddd %mm0, %mm0
741; X64-NEXT:    movq %mm0, (%rdi)
742; X64-NEXT:    retq
743  %1 = insertelement <2 x float> undef, float undef, i32 0
744  %2 = insertelement <2 x float>    %1, float   %a1, i32 1
745  %3 = bitcast <2 x float> %2 to x86_mmx
746  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
747  store x86_mmx %4, x86_mmx *%p0
748  ret void
749}
750
751define void @build_v2f32_z1(x86_mmx *%p0, float %a0, float %a1) nounwind {
752; X86-MMX-LABEL: build_v2f32_z1:
753; X86-MMX:       # %bb.0:
754; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
755; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
756; X86-MMX-NEXT:    pxor %mm1, %mm1
757; X86-MMX-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
758; X86-MMX-NEXT:    paddd %mm1, %mm1
759; X86-MMX-NEXT:    movq %mm1, (%eax)
760; X86-MMX-NEXT:    retl
761;
762; X86-SSE-LABEL: build_v2f32_z1:
763; X86-SSE:       # %bb.0:
764; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
765; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
766; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
767; X86-SSE-NEXT:    pxor %mm1, %mm1
768; X86-SSE-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
769; X86-SSE-NEXT:    paddd %mm1, %mm1
770; X86-SSE-NEXT:    movq %mm1, (%eax)
771; X86-SSE-NEXT:    retl
772;
773; X64-LABEL: build_v2f32_z1:
774; X64:       # %bb.0:
775; X64-NEXT:    movdq2q %xmm1, %mm0
776; X64-NEXT:    pxor %mm1, %mm1
777; X64-NEXT:    punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0]
778; X64-NEXT:    paddd %mm1, %mm1
779; X64-NEXT:    movq %mm1, (%rdi)
780; X64-NEXT:    retq
781  %1 = insertelement <2 x float> undef, float 0.0, i32 0
782  %2 = insertelement <2 x float>    %1, float %a1, i32 1
783  %3 = bitcast <2 x float> %2 to x86_mmx
784  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
785  store x86_mmx %4, x86_mmx *%p0
786  ret void
787}
788
789define void @build_v2f32_00(x86_mmx *%p0, float %a0, float %a1) nounwind {
790; X86-MMX-LABEL: build_v2f32_00:
791; X86-MMX:       # %bb.0:
792; X86-MMX-NEXT:    movl {{[0-9]+}}(%esp), %eax
793; X86-MMX-NEXT:    movd {{[0-9]+}}(%esp), %mm0
794; X86-MMX-NEXT:    punpckldq %mm0, %mm0 # mm0 = mm0[0,0]
795; X86-MMX-NEXT:    paddd %mm0, %mm0
796; X86-MMX-NEXT:    movq %mm0, (%eax)
797; X86-MMX-NEXT:    retl
798;
799; X86-SSE-LABEL: build_v2f32_00:
800; X86-SSE:       # %bb.0:
801; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
802; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
803; X86-SSE-NEXT:    movdq2q %xmm0, %mm0
804; X86-SSE-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
805; X86-SSE-NEXT:    paddd %mm0, %mm0
806; X86-SSE-NEXT:    movq %mm0, (%eax)
807; X86-SSE-NEXT:    retl
808;
809; X64-LABEL: build_v2f32_00:
810; X64:       # %bb.0:
811; X64-NEXT:    movdq2q %xmm0, %mm0
812; X64-NEXT:    pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1]
813; X64-NEXT:    paddd %mm0, %mm0
814; X64-NEXT:    movq %mm0, (%rdi)
815; X64-NEXT:    retq
816  %1 = insertelement <2 x float> undef, float %a0, i32 0
817  %2 = insertelement <2 x float>    %1, float %a0, i32 1
818  %3 = bitcast <2 x float> %2 to x86_mmx
819  %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3)
820  store x86_mmx %4, x86_mmx *%p0
821  ret void
822}
823