1; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
2; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
5
6declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
7
8define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
9; ALL-LABEL: @test1
10; ALL: phaddw
11entry:
12  %0 = bitcast <1 x i64> %b to <4 x i16>
13  %1 = bitcast <1 x i64> %a to <4 x i16>
14  %2 = bitcast <4 x i16> %1 to x86_mmx
15  %3 = bitcast <4 x i16> %0 to x86_mmx
16  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
17  %5 = bitcast x86_mmx %4 to <4 x i16>
18  %6 = bitcast <4 x i16> %5 to <1 x i64>
19  %7 = extractelement <1 x i64> %6, i32 0
20  ret i64 %7
21}
22
23declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
24
25define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
26; ALL-LABEL: @test88
27; ALL: pcmpgtd
28entry:
29  %0 = bitcast <1 x i64> %b to <2 x i32>
30  %1 = bitcast <1 x i64> %a to <2 x i32>
31  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
32  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
33  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
34  %3 = bitcast x86_mmx %2 to <2 x i32>
35  %4 = bitcast <2 x i32> %3 to <1 x i64>
36  %5 = extractelement <1 x i64> %4, i32 0
37  ret i64 %5
38}
39
40declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
41
42define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
43; ALL-LABEL: @test87
44; ALL: pcmpgtw
45entry:
46  %0 = bitcast <1 x i64> %b to <4 x i16>
47  %1 = bitcast <1 x i64> %a to <4 x i16>
48  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
49  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
50  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
51  %3 = bitcast x86_mmx %2 to <4 x i16>
52  %4 = bitcast <4 x i16> %3 to <1 x i64>
53  %5 = extractelement <1 x i64> %4, i32 0
54  ret i64 %5
55}
56
57declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
58
59define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
60; ALL-LABEL: @test86
61; ALL: pcmpgtb
62entry:
63  %0 = bitcast <1 x i64> %b to <8 x i8>
64  %1 = bitcast <1 x i64> %a to <8 x i8>
65  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
66  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
67  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
68  %3 = bitcast x86_mmx %2 to <8 x i8>
69  %4 = bitcast <8 x i8> %3 to <1 x i64>
70  %5 = extractelement <1 x i64> %4, i32 0
71  ret i64 %5
72}
73
74declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
75
76define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
77; ALL-LABEL: @test85
78; ALL: pcmpeqd
79entry:
80  %0 = bitcast <1 x i64> %b to <2 x i32>
81  %1 = bitcast <1 x i64> %a to <2 x i32>
82  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
83  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
84  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
85  %3 = bitcast x86_mmx %2 to <2 x i32>
86  %4 = bitcast <2 x i32> %3 to <1 x i64>
87  %5 = extractelement <1 x i64> %4, i32 0
88  ret i64 %5
89}
90
91declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
92
93define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
94; ALL-LABEL: @test84
95; ALL: pcmpeqw
96entry:
97  %0 = bitcast <1 x i64> %b to <4 x i16>
98  %1 = bitcast <1 x i64> %a to <4 x i16>
99  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
100  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
101  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
102  %3 = bitcast x86_mmx %2 to <4 x i16>
103  %4 = bitcast <4 x i16> %3 to <1 x i64>
104  %5 = extractelement <1 x i64> %4, i32 0
105  ret i64 %5
106}
107
108declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
109
110define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
111; ALL-LABEL: @test83
112; ALL: pcmpeqb
113entry:
114  %0 = bitcast <1 x i64> %b to <8 x i8>
115  %1 = bitcast <1 x i64> %a to <8 x i8>
116  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
117  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
118  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
119  %3 = bitcast x86_mmx %2 to <8 x i8>
120  %4 = bitcast <8 x i8> %3 to <1 x i64>
121  %5 = extractelement <1 x i64> %4, i32 0
122  ret i64 %5
123}
124
125declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
126
127define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
128; ALL-LABEL: @test82
129; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0]
130; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0]
131entry:
132  %0 = bitcast <1 x i64> %b to <2 x i32>
133  %1 = bitcast <1 x i64> %a to <2 x i32>
134  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
135  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
136  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
137  %3 = bitcast x86_mmx %2 to <2 x i32>
138  %4 = bitcast <2 x i32> %3 to <1 x i64>
139  %5 = extractelement <1 x i64> %4, i32 0
140  ret i64 %5
141}
142
143declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
144
145define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
146; ALL-LABEL: @test81
147; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1]
148; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
149entry:
150  %0 = bitcast <1 x i64> %b to <4 x i16>
151  %1 = bitcast <1 x i64> %a to <4 x i16>
152  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
153  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
154  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
155  %3 = bitcast x86_mmx %2 to <4 x i16>
156  %4 = bitcast <4 x i16> %3 to <1 x i64>
157  %5 = extractelement <1 x i64> %4, i32 0
158  ret i64 %5
159}
160
161declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
162
163define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
164; ALL-LABEL: @test80
165; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
166; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
167entry:
168  %0 = bitcast <1 x i64> %b to <8 x i8>
169  %1 = bitcast <1 x i64> %a to <8 x i8>
170  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
171  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
172  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
173  %3 = bitcast x86_mmx %2 to <8 x i8>
174  %4 = bitcast <8 x i8> %3 to <1 x i64>
175  %5 = extractelement <1 x i64> %4, i32 0
176  ret i64 %5
177}
178
179declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
180
181define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
182; ALL-LABEL: @test79
183; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1]
184; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1]
185entry:
186  %0 = bitcast <1 x i64> %b to <2 x i32>
187  %1 = bitcast <1 x i64> %a to <2 x i32>
188  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
189  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
190  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
191  %3 = bitcast x86_mmx %2 to <2 x i32>
192  %4 = bitcast <2 x i32> %3 to <1 x i64>
193  %5 = extractelement <1 x i64> %4, i32 0
194  ret i64 %5
195}
196
197declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
198
199define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
200; ALL-LABEL: @test78
201; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3]
202; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3]
203entry:
204  %0 = bitcast <1 x i64> %b to <4 x i16>
205  %1 = bitcast <1 x i64> %a to <4 x i16>
206  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
207  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
208  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
209  %3 = bitcast x86_mmx %2 to <4 x i16>
210  %4 = bitcast <4 x i16> %3 to <1 x i64>
211  %5 = extractelement <1 x i64> %4, i32 0
212  ret i64 %5
213}
214
215declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
216
217define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
218; ALL-LABEL: @test77
219; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
220; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7]
221entry:
222  %0 = bitcast <1 x i64> %b to <8 x i8>
223  %1 = bitcast <1 x i64> %a to <8 x i8>
224  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
225  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
226  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
227  %3 = bitcast x86_mmx %2 to <8 x i8>
228  %4 = bitcast <8 x i8> %3 to <1 x i64>
229  %5 = extractelement <1 x i64> %4, i32 0
230  ret i64 %5
231}
232
233declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
234
235define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
236; ALL-LABEL: @test76
237; ALL: packuswb
238entry:
239  %0 = bitcast <1 x i64> %b to <4 x i16>
240  %1 = bitcast <1 x i64> %a to <4 x i16>
241  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
242  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
243  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
244  %3 = bitcast x86_mmx %2 to <8 x i8>
245  %4 = bitcast <8 x i8> %3 to <1 x i64>
246  %5 = extractelement <1 x i64> %4, i32 0
247  ret i64 %5
248}
249
250declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
251
252define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
253; ALL-LABEL: @test75
254; ALL: packssdw
255entry:
256  %0 = bitcast <1 x i64> %b to <2 x i32>
257  %1 = bitcast <1 x i64> %a to <2 x i32>
258  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
259  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
260  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
261  %3 = bitcast x86_mmx %2 to <4 x i16>
262  %4 = bitcast <4 x i16> %3 to <1 x i64>
263  %5 = extractelement <1 x i64> %4, i32 0
264  ret i64 %5
265}
266
267declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
268
269define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
270; ALL-LABEL: @test74
271; ALL: packsswb
272entry:
273  %0 = bitcast <1 x i64> %b to <4 x i16>
274  %1 = bitcast <1 x i64> %a to <4 x i16>
275  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
276  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
277  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
278  %3 = bitcast x86_mmx %2 to <8 x i8>
279  %4 = bitcast <8 x i8> %3 to <1 x i64>
280  %5 = extractelement <1 x i64> %4, i32 0
281  ret i64 %5
282}
283
284declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
285
286define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
287; ALL-LABEL: @test73
288; ALL: psrad
289entry:
290  %0 = bitcast <1 x i64> %a to <2 x i32>
291  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
292  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
293  %2 = bitcast x86_mmx %1 to <2 x i32>
294  %3 = bitcast <2 x i32> %2 to <1 x i64>
295  %4 = extractelement <1 x i64> %3, i32 0
296  ret i64 %4
297}
298
299declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
300
301define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
302; ALL-LABEL: @test72
303; ALL: psraw
304entry:
305  %0 = bitcast <1 x i64> %a to <4 x i16>
306  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
307  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
308  %2 = bitcast x86_mmx %1 to <4 x i16>
309  %3 = bitcast <4 x i16> %2 to <1 x i64>
310  %4 = extractelement <1 x i64> %3, i32 0
311  ret i64 %4
312}
313
314define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp {
315; ALL-LABEL: @test72_2
316; ALL-NOT: psraw
317entry:
318  %0 = bitcast <1 x i64> %a to <4 x i16>
319  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
320  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind
321  %2 = bitcast x86_mmx %1 to <4 x i16>
322  %3 = bitcast <4 x i16> %2 to <1 x i64>
323  %4 = extractelement <1 x i64> %3, i32 0
324  ret i64 %4
325}
326
327declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
328
329define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
330; ALL-LABEL: @test71
331; ALL: psrlq
332entry:
333  %0 = extractelement <1 x i64> %a, i32 0
334  %mmx_var.i = bitcast i64 %0 to x86_mmx
335  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
336  %2 = bitcast x86_mmx %1 to i64
337  ret i64 %2
338}
339
340declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
341
342define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
343; ALL-LABEL: @test70
344; ALL: psrld
345entry:
346  %0 = bitcast <1 x i64> %a to <2 x i32>
347  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
348  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
349  %2 = bitcast x86_mmx %1 to <2 x i32>
350  %3 = bitcast <2 x i32> %2 to <1 x i64>
351  %4 = extractelement <1 x i64> %3, i32 0
352  ret i64 %4
353}
354
355define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp {
356; ALL-LABEL: @test70_2
357; ALL-NOT: psrld
358entry:
359  %0 = bitcast <1 x i64> %a to <2 x i32>
360  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
361  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind
362  %2 = bitcast x86_mmx %1 to <2 x i32>
363  %3 = bitcast <2 x i32> %2 to <1 x i64>
364  %4 = extractelement <1 x i64> %3, i32 0
365  ret i64 %4
366}
367
368declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
369
370define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
371; ALL-LABEL: @test69
372; ALL: psrlw
373entry:
374  %0 = bitcast <1 x i64> %a to <4 x i16>
375  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
376  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
377  %2 = bitcast x86_mmx %1 to <4 x i16>
378  %3 = bitcast <4 x i16> %2 to <1 x i64>
379  %4 = extractelement <1 x i64> %3, i32 0
380  ret i64 %4
381}
382
383declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
384
385define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
386; ALL-LABEL: @test68
387; ALL: psllq
388entry:
389  %0 = extractelement <1 x i64> %a, i32 0
390  %mmx_var.i = bitcast i64 %0 to x86_mmx
391  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
392  %2 = bitcast x86_mmx %1 to i64
393  ret i64 %2
394}
395
396declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
397
398define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
399; ALL-LABEL: @test67
400; ALL: pslld
401entry:
402  %0 = bitcast <1 x i64> %a to <2 x i32>
403  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
404  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
405  %2 = bitcast x86_mmx %1 to <2 x i32>
406  %3 = bitcast <2 x i32> %2 to <1 x i64>
407  %4 = extractelement <1 x i64> %3, i32 0
408  ret i64 %4
409}
410
411declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
412
413define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
414; ALL-LABEL: @test66
415; ALL: psllw
416entry:
417  %0 = bitcast <1 x i64> %a to <4 x i16>
418  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
419  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
420  %2 = bitcast x86_mmx %1 to <4 x i16>
421  %3 = bitcast <4 x i16> %2 to <1 x i64>
422  %4 = extractelement <1 x i64> %3, i32 0
423  ret i64 %4
424}
425
426define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp {
427; ALL-LABEL: @test66_2
428; ALL-NOT: psllw
429entry:
430  %0 = bitcast <1 x i64> %a to <4 x i16>
431  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
432  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind
433  %2 = bitcast x86_mmx %1 to <4 x i16>
434  %3 = bitcast <4 x i16> %2 to <1 x i64>
435  %4 = extractelement <1 x i64> %3, i32 0
436  ret i64 %4
437}
438
439declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
440
441define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
442; ALL-LABEL: @test65
443; ALL: psrad
444entry:
445  %0 = bitcast <1 x i64> %a to <2 x i32>
446  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
447  %1 = extractelement <1 x i64> %b, i32 0
448  %mmx_var1.i = bitcast i64 %1 to x86_mmx
449  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
450  %3 = bitcast x86_mmx %2 to <2 x i32>
451  %4 = bitcast <2 x i32> %3 to <1 x i64>
452  %5 = extractelement <1 x i64> %4, i32 0
453  ret i64 %5
454}
455
456declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
457
458define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
459; ALL-LABEL: @test64
460; ALL: psraw
461entry:
462  %0 = bitcast <1 x i64> %a to <4 x i16>
463  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
464  %1 = extractelement <1 x i64> %b, i32 0
465  %mmx_var1.i = bitcast i64 %1 to x86_mmx
466  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
467  %3 = bitcast x86_mmx %2 to <4 x i16>
468  %4 = bitcast <4 x i16> %3 to <1 x i64>
469  %5 = extractelement <1 x i64> %4, i32 0
470  ret i64 %5
471}
472
473declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
474
475define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
476; ALL-LABEL: @test63
477; ALL: psrlq
478entry:
479  %0 = extractelement <1 x i64> %a, i32 0
480  %mmx_var.i = bitcast i64 %0 to x86_mmx
481  %1 = extractelement <1 x i64> %b, i32 0
482  %mmx_var1.i = bitcast i64 %1 to x86_mmx
483  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
484  %3 = bitcast x86_mmx %2 to i64
485  ret i64 %3
486}
487
488declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
489
490define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
491; ALL-LABEL: @test62
492; ALL: psrld
493entry:
494  %0 = bitcast <1 x i64> %a to <2 x i32>
495  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
496  %1 = extractelement <1 x i64> %b, i32 0
497  %mmx_var1.i = bitcast i64 %1 to x86_mmx
498  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
499  %3 = bitcast x86_mmx %2 to <2 x i32>
500  %4 = bitcast <2 x i32> %3 to <1 x i64>
501  %5 = extractelement <1 x i64> %4, i32 0
502  ret i64 %5
503}
504
505declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
506
507define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
508; ALL-LABEL: @test61
509; ALL: psrlw
510entry:
511  %0 = bitcast <1 x i64> %a to <4 x i16>
512  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
513  %1 = extractelement <1 x i64> %b, i32 0
514  %mmx_var1.i = bitcast i64 %1 to x86_mmx
515  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
516  %3 = bitcast x86_mmx %2 to <4 x i16>
517  %4 = bitcast <4 x i16> %3 to <1 x i64>
518  %5 = extractelement <1 x i64> %4, i32 0
519  ret i64 %5
520}
521
522declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
523
524define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
525; ALL-LABEL: @test60
526; ALL: psllq
527entry:
528  %0 = extractelement <1 x i64> %a, i32 0
529  %mmx_var.i = bitcast i64 %0 to x86_mmx
530  %1 = extractelement <1 x i64> %b, i32 0
531  %mmx_var1.i = bitcast i64 %1 to x86_mmx
532  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
533  %3 = bitcast x86_mmx %2 to i64
534  ret i64 %3
535}
536
537declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
538
539define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
540; ALL-LABEL: @test59
541; ALL: pslld
542entry:
543  %0 = bitcast <1 x i64> %a to <2 x i32>
544  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
545  %1 = extractelement <1 x i64> %b, i32 0
546  %mmx_var1.i = bitcast i64 %1 to x86_mmx
547  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
548  %3 = bitcast x86_mmx %2 to <2 x i32>
549  %4 = bitcast <2 x i32> %3 to <1 x i64>
550  %5 = extractelement <1 x i64> %4, i32 0
551  ret i64 %5
552}
553
554declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
555
556define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
557; ALL-LABEL: @test58
558; ALL: psllw
559entry:
560  %0 = bitcast <1 x i64> %a to <4 x i16>
561  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
562  %1 = extractelement <1 x i64> %b, i32 0
563  %mmx_var1.i = bitcast i64 %1 to x86_mmx
564  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
565  %3 = bitcast x86_mmx %2 to <4 x i16>
566  %4 = bitcast <4 x i16> %3 to <1 x i64>
567  %5 = extractelement <1 x i64> %4, i32 0
568  ret i64 %5
569}
570
571declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
572
573define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
574; ALL-LABEL: @test56
575; ALL: pxor
576entry:
577  %0 = bitcast <1 x i64> %b to <2 x i32>
578  %1 = bitcast <1 x i64> %a to <2 x i32>
579  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
580  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
581  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
582  %3 = bitcast x86_mmx %2 to <2 x i32>
583  %4 = bitcast <2 x i32> %3 to <1 x i64>
584  %5 = extractelement <1 x i64> %4, i32 0
585  ret i64 %5
586}
587
588declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
589
590define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
591; ALL-LABEL: @test55
592; ALL: por
593entry:
594  %0 = bitcast <1 x i64> %b to <2 x i32>
595  %1 = bitcast <1 x i64> %a to <2 x i32>
596  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
597  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
598  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
599  %3 = bitcast x86_mmx %2 to <2 x i32>
600  %4 = bitcast <2 x i32> %3 to <1 x i64>
601  %5 = extractelement <1 x i64> %4, i32 0
602  ret i64 %5
603}
604
605declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
606
607define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
608; ALL-LABEL: @test54
609; ALL: pandn
610entry:
611  %0 = bitcast <1 x i64> %b to <2 x i32>
612  %1 = bitcast <1 x i64> %a to <2 x i32>
613  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
614  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
615  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
616  %3 = bitcast x86_mmx %2 to <2 x i32>
617  %4 = bitcast <2 x i32> %3 to <1 x i64>
618  %5 = extractelement <1 x i64> %4, i32 0
619  ret i64 %5
620}
621
622declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
623
624define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
625; ALL-LABEL: @test53
626; ALL: pand
627entry:
628  %0 = bitcast <1 x i64> %b to <2 x i32>
629  %1 = bitcast <1 x i64> %a to <2 x i32>
630  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
631  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
632  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
633  %3 = bitcast x86_mmx %2 to <2 x i32>
634  %4 = bitcast <2 x i32> %3 to <1 x i64>
635  %5 = extractelement <1 x i64> %4, i32 0
636  ret i64 %5
637}
638
639declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
640
641define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
642; ALL-LABEL: @test52
643; ALL: pmullw
644entry:
645  %0 = bitcast <1 x i64> %b to <4 x i16>
646  %1 = bitcast <1 x i64> %a to <4 x i16>
647  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
648  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
649  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
650  %3 = bitcast x86_mmx %2 to <4 x i16>
651  %4 = bitcast <4 x i16> %3 to <1 x i64>
652  %5 = extractelement <1 x i64> %4, i32 0
653  ret i64 %5
654}
655
656define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
657; ALL-LABEL: @test51
658; ALL: pmullw
659entry:
660  %0 = bitcast <1 x i64> %b to <4 x i16>
661  %1 = bitcast <1 x i64> %a to <4 x i16>
662  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
663  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
664  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
665  %3 = bitcast x86_mmx %2 to <4 x i16>
666  %4 = bitcast <4 x i16> %3 to <1 x i64>
667  %5 = extractelement <1 x i64> %4, i32 0
668  ret i64 %5
669}
670
671declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
672
673define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
674; ALL-LABEL: @test50
675; ALL: pmulhw
676entry:
677  %0 = bitcast <1 x i64> %b to <4 x i16>
678  %1 = bitcast <1 x i64> %a to <4 x i16>
679  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
680  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
681  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
682  %3 = bitcast x86_mmx %2 to <4 x i16>
683  %4 = bitcast <4 x i16> %3 to <1 x i64>
684  %5 = extractelement <1 x i64> %4, i32 0
685  ret i64 %5
686}
687
688declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
689
690define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
691; ALL-LABEL: @test49
692; ALL: pmaddwd
693entry:
694  %0 = bitcast <1 x i64> %b to <4 x i16>
695  %1 = bitcast <1 x i64> %a to <4 x i16>
696  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
697  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
698  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
699  %3 = bitcast x86_mmx %2 to <2 x i32>
700  %4 = bitcast <2 x i32> %3 to <1 x i64>
701  %5 = extractelement <1 x i64> %4, i32 0
702  ret i64 %5
703}
704
705declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
706
707define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
708; ALL-LABEL: @test48
709; ALL: psubusw
710entry:
711  %0 = bitcast <1 x i64> %b to <4 x i16>
712  %1 = bitcast <1 x i64> %a to <4 x i16>
713  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
714  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
715  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
716  %3 = bitcast x86_mmx %2 to <4 x i16>
717  %4 = bitcast <4 x i16> %3 to <1 x i64>
718  %5 = extractelement <1 x i64> %4, i32 0
719  ret i64 %5
720}
721
722declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
723
724define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
725; ALL-LABEL: @test47
726; ALL: psubusb
727entry:
728  %0 = bitcast <1 x i64> %b to <8 x i8>
729  %1 = bitcast <1 x i64> %a to <8 x i8>
730  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
731  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
732  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
733  %3 = bitcast x86_mmx %2 to <8 x i8>
734  %4 = bitcast <8 x i8> %3 to <1 x i64>
735  %5 = extractelement <1 x i64> %4, i32 0
736  ret i64 %5
737}
738
739declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
740
741define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
742; ALL-LABEL: @test46
743; ALL: psubsw
744entry:
745  %0 = bitcast <1 x i64> %b to <4 x i16>
746  %1 = bitcast <1 x i64> %a to <4 x i16>
747  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
748  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
749  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
750  %3 = bitcast x86_mmx %2 to <4 x i16>
751  %4 = bitcast <4 x i16> %3 to <1 x i64>
752  %5 = extractelement <1 x i64> %4, i32 0
753  ret i64 %5
754}
755
756declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
757
758define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
759; ALL-LABEL: @test45
760; ALL: psubsb
761entry:
762  %0 = bitcast <1 x i64> %b to <8 x i8>
763  %1 = bitcast <1 x i64> %a to <8 x i8>
764  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
765  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
766  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
767  %3 = bitcast x86_mmx %2 to <8 x i8>
768  %4 = bitcast <8 x i8> %3 to <1 x i64>
769  %5 = extractelement <1 x i64> %4, i32 0
770  ret i64 %5
771}
772
773define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
774; ALL-LABEL: @test44
775; ALL: psubq
776entry:
777  %0 = extractelement <1 x i64> %a, i32 0
778  %mmx_var = bitcast i64 %0 to x86_mmx
779  %1 = extractelement <1 x i64> %b, i32 0
780  %mmx_var1 = bitcast i64 %1 to x86_mmx
781  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
782  %3 = bitcast x86_mmx %2 to i64
783  ret i64 %3
784}
785
786declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
787
788declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
789
790define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
791; ALL-LABEL: @test43
792; ALL: psubd
793entry:
794  %0 = bitcast <1 x i64> %b to <2 x i32>
795  %1 = bitcast <1 x i64> %a to <2 x i32>
796  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
797  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
798  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
799  %3 = bitcast x86_mmx %2 to <2 x i32>
800  %4 = bitcast <2 x i32> %3 to <1 x i64>
801  %5 = extractelement <1 x i64> %4, i32 0
802  ret i64 %5
803}
804
805declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
806
807define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
808; ALL-LABEL: @test42
809; ALL: psubw
810entry:
811  %0 = bitcast <1 x i64> %b to <4 x i16>
812  %1 = bitcast <1 x i64> %a to <4 x i16>
813  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
814  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
815  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
816  %3 = bitcast x86_mmx %2 to <4 x i16>
817  %4 = bitcast <4 x i16> %3 to <1 x i64>
818  %5 = extractelement <1 x i64> %4, i32 0
819  ret i64 %5
820}
821
822declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
823
824define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
825; ALL-LABEL: @test41
826; ALL: psubb
827entry:
828  %0 = bitcast <1 x i64> %b to <8 x i8>
829  %1 = bitcast <1 x i64> %a to <8 x i8>
830  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
831  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
832  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
833  %3 = bitcast x86_mmx %2 to <8 x i8>
834  %4 = bitcast <8 x i8> %3 to <1 x i64>
835  %5 = extractelement <1 x i64> %4, i32 0
836  ret i64 %5
837}
838
839declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
840
841define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
842; ALL-LABEL: @test40
843; ALL: paddusw
844entry:
845  %0 = bitcast <1 x i64> %b to <4 x i16>
846  %1 = bitcast <1 x i64> %a to <4 x i16>
847  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
848  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
849  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
850  %3 = bitcast x86_mmx %2 to <4 x i16>
851  %4 = bitcast <4 x i16> %3 to <1 x i64>
852  %5 = extractelement <1 x i64> %4, i32 0
853  ret i64 %5
854}
855
856declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
857
858define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
859; ALL-LABEL: @test39
860; ALL: paddusb
861entry:
862  %0 = bitcast <1 x i64> %b to <8 x i8>
863  %1 = bitcast <1 x i64> %a to <8 x i8>
864  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
865  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
866  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
867  %3 = bitcast x86_mmx %2 to <8 x i8>
868  %4 = bitcast <8 x i8> %3 to <1 x i64>
869  %5 = extractelement <1 x i64> %4, i32 0
870  ret i64 %5
871}
872
873declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
874
875define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
876; ALL-LABEL: @test38
877; ALL: paddsw
878entry:
879  %0 = bitcast <1 x i64> %b to <4 x i16>
880  %1 = bitcast <1 x i64> %a to <4 x i16>
881  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
882  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
883  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
884  %3 = bitcast x86_mmx %2 to <4 x i16>
885  %4 = bitcast <4 x i16> %3 to <1 x i64>
886  %5 = extractelement <1 x i64> %4, i32 0
887  ret i64 %5
888}
889
890declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
891
892define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
893; ALL-LABEL: @test37
894; ALL: paddsb
895entry:
896  %0 = bitcast <1 x i64> %b to <8 x i8>
897  %1 = bitcast <1 x i64> %a to <8 x i8>
898  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
899  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
900  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
901  %3 = bitcast x86_mmx %2 to <8 x i8>
902  %4 = bitcast <8 x i8> %3 to <1 x i64>
903  %5 = extractelement <1 x i64> %4, i32 0
904  ret i64 %5
905}
906
907declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
908
909define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
910; ALL-LABEL: @test36
911; ALL: paddq
912entry:
913  %0 = extractelement <1 x i64> %a, i32 0
914  %mmx_var = bitcast i64 %0 to x86_mmx
915  %1 = extractelement <1 x i64> %b, i32 0
916  %mmx_var1 = bitcast i64 %1 to x86_mmx
917  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
918  %3 = bitcast x86_mmx %2 to i64
919  ret i64 %3
920}
921
922declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
923
924define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
925; ALL-LABEL: @test35
926; ALL: paddd
927entry:
928  %0 = bitcast <1 x i64> %b to <2 x i32>
929  %1 = bitcast <1 x i64> %a to <2 x i32>
930  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
931  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
932  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
933  %3 = bitcast x86_mmx %2 to <2 x i32>
934  %4 = bitcast <2 x i32> %3 to <1 x i64>
935  %5 = extractelement <1 x i64> %4, i32 0
936  ret i64 %5
937}
938
939declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
940
941define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
942; ALL-LABEL: @test34
943; ALL: paddw
944entry:
945  %0 = bitcast <1 x i64> %b to <4 x i16>
946  %1 = bitcast <1 x i64> %a to <4 x i16>
947  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
948  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
949  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
950  %3 = bitcast x86_mmx %2 to <4 x i16>
951  %4 = bitcast <4 x i16> %3 to <1 x i64>
952  %5 = extractelement <1 x i64> %4, i32 0
953  ret i64 %5
954}
955
956declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
957
958define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
959; ALL-LABEL: @test33
960; ALL: paddb
961entry:
962  %0 = bitcast <1 x i64> %b to <8 x i8>
963  %1 = bitcast <1 x i64> %a to <8 x i8>
964  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
965  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
966  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
967  %3 = bitcast x86_mmx %2 to <8 x i8>
968  %4 = bitcast <8 x i8> %3 to <1 x i64>
969  %5 = extractelement <1 x i64> %4, i32 0
970  ret i64 %5
971}
972
973declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
974
975define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
976; ALL-LABEL: @test32
977; ALL: psadbw
978entry:
979  %0 = bitcast <1 x i64> %b to <8 x i8>
980  %1 = bitcast <1 x i64> %a to <8 x i8>
981  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
982  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
983  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
984  %3 = bitcast x86_mmx %2 to i64
985  ret i64 %3
986}
987
988declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
989
990define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
991; ALL-LABEL: @test31
992; ALL: pminsw
993entry:
994  %0 = bitcast <1 x i64> %b to <4 x i16>
995  %1 = bitcast <1 x i64> %a to <4 x i16>
996  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
997  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
998  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
999  %3 = bitcast x86_mmx %2 to <4 x i16>
1000  %4 = bitcast <4 x i16> %3 to <1 x i64>
1001  %5 = extractelement <1 x i64> %4, i32 0
1002  ret i64 %5
1003}
1004
1005declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
1006
1007define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1008; ALL-LABEL: @test30
1009; ALL: pminub
1010entry:
1011  %0 = bitcast <1 x i64> %b to <8 x i8>
1012  %1 = bitcast <1 x i64> %a to <8 x i8>
1013  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1014  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1015  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1016  %3 = bitcast x86_mmx %2 to <8 x i8>
1017  %4 = bitcast <8 x i8> %3 to <1 x i64>
1018  %5 = extractelement <1 x i64> %4, i32 0
1019  ret i64 %5
1020}
1021
1022declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
1023
1024define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1025; ALL-LABEL: @test29
1026; ALL: pmaxsw
1027entry:
1028  %0 = bitcast <1 x i64> %b to <4 x i16>
1029  %1 = bitcast <1 x i64> %a to <4 x i16>
1030  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1031  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1032  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1033  %3 = bitcast x86_mmx %2 to <4 x i16>
1034  %4 = bitcast <4 x i16> %3 to <1 x i64>
1035  %5 = extractelement <1 x i64> %4, i32 0
1036  ret i64 %5
1037}
1038
1039declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
1040
1041define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1042; ALL-LABEL: @test28
1043; ALL: pmaxub
1044entry:
1045  %0 = bitcast <1 x i64> %b to <8 x i8>
1046  %1 = bitcast <1 x i64> %a to <8 x i8>
1047  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1048  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1049  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1050  %3 = bitcast x86_mmx %2 to <8 x i8>
1051  %4 = bitcast <8 x i8> %3 to <1 x i64>
1052  %5 = extractelement <1 x i64> %4, i32 0
1053  ret i64 %5
1054}
1055
1056declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
1057
1058define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1059; ALL-LABEL: @test27
1060; ALL: pavgw
1061entry:
1062  %0 = bitcast <1 x i64> %b to <4 x i16>
1063  %1 = bitcast <1 x i64> %a to <4 x i16>
1064  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1065  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1066  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1067  %3 = bitcast x86_mmx %2 to <4 x i16>
1068  %4 = bitcast <4 x i16> %3 to <1 x i64>
1069  %5 = extractelement <1 x i64> %4, i32 0
1070  ret i64 %5
1071}
1072
1073declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
1074
1075define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1076; ALL-LABEL: @test26
1077; ALL: pavgb
1078entry:
1079  %0 = bitcast <1 x i64> %b to <8 x i8>
1080  %1 = bitcast <1 x i64> %a to <8 x i8>
1081  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1082  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1083  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1084  %3 = bitcast x86_mmx %2 to <8 x i8>
1085  %4 = bitcast <8 x i8> %3 to <1 x i64>
1086  %5 = extractelement <1 x i64> %4, i32 0
1087  ret i64 %5
1088}
1089
1090declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
1091
1092define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
1093; ALL-LABEL: @test25
1094; ALL: movntq
1095entry:
1096  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
1097  %0 = extractelement <1 x i64> %a, i32 0
1098  %mmx_var.i = bitcast i64 %0 to x86_mmx
1099  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
1100  ret void
1101}
1102
1103declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
1104
1105define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
1106; ALL-LABEL: @test24
1107; ALL: pmovmskb
1108entry:
1109  %0 = bitcast <1 x i64> %a to <8 x i8>
1110  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
1111  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
1112  ret i32 %1
1113}
1114
1115declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
1116
1117define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
1118; ALL-LABEL: @test23
1119; ALL: maskmovq
1120entry:
1121  %0 = bitcast <1 x i64> %n to <8 x i8>
1122  %1 = bitcast <1 x i64> %d to <8 x i8>
1123  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
1124  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
1125  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
1126  ret void
1127}
1128
1129declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
1130
1131define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1132; ALL-LABEL: @test22
1133; ALL: pmulhuw
1134entry:
1135  %0 = bitcast <1 x i64> %b to <4 x i16>
1136  %1 = bitcast <1 x i64> %a to <4 x i16>
1137  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
1138  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
1139  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1140  %3 = bitcast x86_mmx %2 to <4 x i16>
1141  %4 = bitcast <4 x i16> %3 to <1 x i64>
1142  %5 = extractelement <1 x i64> %4, i32 0
1143  ret i64 %5
1144}
1145
1146declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
1147
1148define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
1149; ALL-LABEL: @test21
1150; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
1151; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
1152entry:
1153  %0 = bitcast <1 x i64> %a to <4 x i16>
1154  %1 = bitcast <4 x i16> %0 to x86_mmx
1155  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1156  %3 = bitcast x86_mmx %2 to <4 x i16>
1157  %4 = bitcast <4 x i16> %3 to <1 x i64>
1158  %5 = extractelement <1 x i64> %4, i32 0
1159  ret i64 %5
1160}
1161
1162define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
1163; ALL-LABEL: @test21_2
1164; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
1165; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
1166; ALL: movd
1167entry:
1168  %0 = bitcast <1 x i64> %a to <4 x i16>
1169  %1 = bitcast <4 x i16> %0 to x86_mmx
1170  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
1171  %3 = bitcast x86_mmx %2 to <4 x i16>
1172  %4 = bitcast <4 x i16> %3 to <2 x i32>
1173  %5 = extractelement <2 x i32> %4, i32 0
1174  ret i32 %5
1175}
1176
1177declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
1178
1179define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1180; ALL-LABEL: @test20
1181; ALL: pmuludq
1182entry:
1183  %0 = bitcast <1 x i64> %b to <2 x i32>
1184  %1 = bitcast <1 x i64> %a to <2 x i32>
1185  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
1186  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
1187  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
1188  %3 = bitcast x86_mmx %2 to i64
1189  ret i64 %3
1190}
1191
1192declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
1193
1194define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
1195; ALL-LABEL: @test19
1196; ALL: cvtpi2pd
1197entry:
1198  %0 = bitcast <1 x i64> %a to <2 x i32>
1199  %1 = bitcast <2 x i32> %0 to x86_mmx
1200  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
1201  ret <2 x double> %2
1202}
1203
1204declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
1205
1206define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
1207; ALL-LABEL: @test18
1208; ALL: cvttpd2pi
1209entry:
1210  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
1211  %1 = bitcast x86_mmx %0 to <2 x i32>
1212  %2 = bitcast <2 x i32> %1 to <1 x i64>
1213  %3 = extractelement <1 x i64> %2, i32 0
1214  ret i64 %3
1215}
1216
1217declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
1218
1219define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
1220; ALL-LABEL: @test17
1221; ALL: cvtpd2pi
1222entry:
1223  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
1224  %1 = bitcast x86_mmx %0 to <2 x i32>
1225  %2 = bitcast <2 x i32> %1 to <1 x i64>
1226  %3 = extractelement <1 x i64> %2, i32 0
1227  ret i64 %3
1228}
1229
1230declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
1231
1232define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1233; ALL-LABEL: @test16
1234; ALL: palignr
1235entry:
1236  %0 = extractelement <1 x i64> %a, i32 0
1237  %mmx_var = bitcast i64 %0 to x86_mmx
1238  %1 = extractelement <1 x i64> %b, i32 0
1239  %mmx_var1 = bitcast i64 %1 to x86_mmx
1240  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
1241  %3 = bitcast x86_mmx %2 to i64
1242  ret i64 %3
1243}
1244
1245declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1246
1247define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
1248; ALL-LABEL: @test15
1249; ALL: pabsd
1250entry:
1251  %0 = bitcast <1 x i64> %a to <2 x i32>
1252  %1 = bitcast <2 x i32> %0 to x86_mmx
1253  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
1254  %3 = bitcast x86_mmx %2 to <2 x i32>
1255  %4 = bitcast <2 x i32> %3 to <1 x i64>
1256  %5 = extractelement <1 x i64> %4, i32 0
1257  ret i64 %5
1258}
1259
1260declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1261
1262define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
1263; ALL-LABEL: @test14
1264; ALL: pabsw
1265entry:
1266  %0 = bitcast <1 x i64> %a to <4 x i16>
1267  %1 = bitcast <4 x i16> %0 to x86_mmx
1268  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
1269  %3 = bitcast x86_mmx %2 to <4 x i16>
1270  %4 = bitcast <4 x i16> %3 to <1 x i64>
1271  %5 = extractelement <1 x i64> %4, i32 0
1272  ret i64 %5
1273}
1274
1275declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1276
1277define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
1278; ALL-LABEL: @test13
1279; ALL: pabsb
1280entry:
1281  %0 = bitcast <1 x i64> %a to <8 x i8>
1282  %1 = bitcast <8 x i8> %0 to x86_mmx
1283  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
1284  %3 = bitcast x86_mmx %2 to <8 x i8>
1285  %4 = bitcast <8 x i8> %3 to <1 x i64>
1286  %5 = extractelement <1 x i64> %4, i32 0
1287  ret i64 %5
1288}
1289
1290declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
1291
1292define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1293; ALL-LABEL: @test12
1294; ALL: psignd
1295entry:
1296  %0 = bitcast <1 x i64> %b to <2 x i32>
1297  %1 = bitcast <1 x i64> %a to <2 x i32>
1298  %2 = bitcast <2 x i32> %1 to x86_mmx
1299  %3 = bitcast <2 x i32> %0 to x86_mmx
1300  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1301  %5 = bitcast x86_mmx %4 to <2 x i32>
1302  %6 = bitcast <2 x i32> %5 to <1 x i64>
1303  %7 = extractelement <1 x i64> %6, i32 0
1304  ret i64 %7
1305}
1306
1307declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
1308
1309define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1310; ALL-LABEL: @test11
1311; ALL: psignw
1312entry:
1313  %0 = bitcast <1 x i64> %b to <4 x i16>
1314  %1 = bitcast <1 x i64> %a to <4 x i16>
1315  %2 = bitcast <4 x i16> %1 to x86_mmx
1316  %3 = bitcast <4 x i16> %0 to x86_mmx
1317  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1318  %5 = bitcast x86_mmx %4 to <4 x i16>
1319  %6 = bitcast <4 x i16> %5 to <1 x i64>
1320  %7 = extractelement <1 x i64> %6, i32 0
1321  ret i64 %7
1322}
1323
1324declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
1325
1326define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1327; ALL-LABEL: @test10
1328; ALL: psignb
1329entry:
1330  %0 = bitcast <1 x i64> %b to <8 x i8>
1331  %1 = bitcast <1 x i64> %a to <8 x i8>
1332  %2 = bitcast <8 x i8> %1 to x86_mmx
1333  %3 = bitcast <8 x i8> %0 to x86_mmx
1334  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1335  %5 = bitcast x86_mmx %4 to <8 x i8>
1336  %6 = bitcast <8 x i8> %5 to <1 x i64>
1337  %7 = extractelement <1 x i64> %6, i32 0
1338  ret i64 %7
1339}
1340
1341declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
1342
1343define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1344; ALL-LABEL: @test9
1345; ALL: pshufb
1346entry:
1347  %0 = bitcast <1 x i64> %b to <8 x i8>
1348  %1 = bitcast <1 x i64> %a to <8 x i8>
1349  %2 = bitcast <8 x i8> %1 to x86_mmx
1350  %3 = bitcast <8 x i8> %0 to x86_mmx
1351  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
1352  %5 = bitcast x86_mmx %4 to <8 x i8>
1353  %6 = bitcast <8 x i8> %5 to <1 x i64>
1354  %7 = extractelement <1 x i64> %6, i32 0
1355  ret i64 %7
1356}
1357
1358declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
1359
1360define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1361; ALL-LABEL: @test8
1362; ALL: pmulhrsw
1363entry:
1364  %0 = bitcast <1 x i64> %b to <4 x i16>
1365  %1 = bitcast <1 x i64> %a to <4 x i16>
1366  %2 = bitcast <4 x i16> %1 to x86_mmx
1367  %3 = bitcast <4 x i16> %0 to x86_mmx
1368  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1369  %5 = bitcast x86_mmx %4 to <4 x i16>
1370  %6 = bitcast <4 x i16> %5 to <1 x i64>
1371  %7 = extractelement <1 x i64> %6, i32 0
1372  ret i64 %7
1373}
1374
1375declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
1376
1377define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1378; ALL-LABEL: @test7
1379; ALL: pmaddubsw
1380entry:
1381  %0 = bitcast <1 x i64> %b to <8 x i8>
1382  %1 = bitcast <1 x i64> %a to <8 x i8>
1383  %2 = bitcast <8 x i8> %1 to x86_mmx
1384  %3 = bitcast <8 x i8> %0 to x86_mmx
1385  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1386  %5 = bitcast x86_mmx %4 to <8 x i8>
1387  %6 = bitcast <8 x i8> %5 to <1 x i64>
1388  %7 = extractelement <1 x i64> %6, i32 0
1389  ret i64 %7
1390}
1391
1392declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
1393
1394define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1395; ALL-LABEL: @test6
1396; ALL: phsubsw
1397entry:
1398  %0 = bitcast <1 x i64> %b to <4 x i16>
1399  %1 = bitcast <1 x i64> %a to <4 x i16>
1400  %2 = bitcast <4 x i16> %1 to x86_mmx
1401  %3 = bitcast <4 x i16> %0 to x86_mmx
1402  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1403  %5 = bitcast x86_mmx %4 to <4 x i16>
1404  %6 = bitcast <4 x i16> %5 to <1 x i64>
1405  %7 = extractelement <1 x i64> %6, i32 0
1406  ret i64 %7
1407}
1408
1409declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
1410
1411define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1412; ALL-LABEL: @test5
1413; ALL: phsubd
1414entry:
1415  %0 = bitcast <1 x i64> %b to <2 x i32>
1416  %1 = bitcast <1 x i64> %a to <2 x i32>
1417  %2 = bitcast <2 x i32> %1 to x86_mmx
1418  %3 = bitcast <2 x i32> %0 to x86_mmx
1419  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1420  %5 = bitcast x86_mmx %4 to <2 x i32>
1421  %6 = bitcast <2 x i32> %5 to <1 x i64>
1422  %7 = extractelement <1 x i64> %6, i32 0
1423  ret i64 %7
1424}
1425
1426declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
1427
1428define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1429; ALL-LABEL: @test4
1430; ALL: phsubw
1431entry:
1432  %0 = bitcast <1 x i64> %b to <4 x i16>
1433  %1 = bitcast <1 x i64> %a to <4 x i16>
1434  %2 = bitcast <4 x i16> %1 to x86_mmx
1435  %3 = bitcast <4 x i16> %0 to x86_mmx
1436  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
1437  %5 = bitcast x86_mmx %4 to <4 x i16>
1438  %6 = bitcast <4 x i16> %5 to <1 x i64>
1439  %7 = extractelement <1 x i64> %6, i32 0
1440  ret i64 %7
1441}
1442
1443declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
1444
1445define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1446; ALL-LABEL: @test3
1447; ALL: phaddsw
1448entry:
1449  %0 = bitcast <1 x i64> %b to <4 x i16>
1450  %1 = bitcast <1 x i64> %a to <4 x i16>
1451  %2 = bitcast <4 x i16> %1 to x86_mmx
1452  %3 = bitcast <4 x i16> %0 to x86_mmx
1453  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
1454  %5 = bitcast x86_mmx %4 to <4 x i16>
1455  %6 = bitcast <4 x i16> %5 to <1 x i64>
1456  %7 = extractelement <1 x i64> %6, i32 0
1457  ret i64 %7
1458}
1459
1460declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
1461
1462define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
1463; ALL-LABEL: @test2
1464; ALL: phaddd
1465entry:
1466  %0 = bitcast <1 x i64> %b to <2 x i32>
1467  %1 = bitcast <1 x i64> %a to <2 x i32>
1468  %2 = bitcast <2 x i32> %1 to x86_mmx
1469  %3 = bitcast <2 x i32> %0 to x86_mmx
1470  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
1471  %5 = bitcast x86_mmx %4 to <2 x i32>
1472  %6 = bitcast <2 x i32> %5 to <1 x i64>
1473  %7 = extractelement <1 x i64> %6, i32 0
1474  ret i64 %7
1475}
1476
1477define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
1478; ALL-LABEL: @test89
1479; ALL: cvtpi2ps
1480  %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
1481  ret <4 x float> %c
1482}
1483
1484declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
1485
1486; ALL-LABEL: test90
1487define void @test90() {
1488; ALL-LABEL: @test90
1489; ALL: emms
1490  call void @llvm.x86.mmx.emms()
1491  ret void
1492}
1493
1494declare void @llvm.x86.mmx.emms()
1495
1496define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind {
1497; X86-LABEL: test_mm_insert_pi16:
1498; X86:       # %bb.0: # %entry
1499; X86-NEXT:    pushl %ebp
1500; X86-NEXT:    movl %esp, %ebp
1501; X86-NEXT:    andl $-8, %esp
1502; X86-NEXT:    subl $16, %esp
1503; X86-NEXT:    movl 8(%ebp), %eax
1504; X86-NEXT:    movl 12(%ebp), %ecx
1505; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1506; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
1507; X86-NEXT:    movq {{[0-9]+}}(%esp), %mm0
1508; X86-NEXT:    pinsrw $2, 16(%ebp), %mm0
1509; X86-NEXT:    movq %mm0, (%esp)
1510; X86-NEXT:    movl (%esp), %eax
1511; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
1512; X86-NEXT:    movl %ebp, %esp
1513; X86-NEXT:    popl %ebp
1514; X86-NEXT:    retl
1515;
1516; X64-LABEL: test_mm_insert_pi16:
1517; X64:       # %bb.0: # %entry
1518; X64-NEXT:    movq %rdi, %mm0
1519; X64-NEXT:    pinsrw $2, %esi, %mm0
1520; X64-NEXT:    movq %mm0, %rax
1521; X64-NEXT:    retq
1522entry:
1523  %0 = bitcast <1 x i64> %a.coerce to x86_mmx
1524  %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2)
1525  %2 = bitcast x86_mmx %1 to <1 x i64>
1526  ret <1 x i64> %2
1527}
1528
1529declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg)
1530
1531define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind {
1532; X86-LABEL: test_mm_extract_pi16:
1533; X86:       # %bb.0: # %entry
1534; X86-NEXT:    pushl %ebp
1535; X86-NEXT:    movl %esp, %ebp
1536; X86-NEXT:    andl $-8, %esp
1537; X86-NEXT:    subl $8, %esp
1538; X86-NEXT:    movl 8(%ebp), %eax
1539; X86-NEXT:    movl 12(%ebp), %ecx
1540; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
1541; X86-NEXT:    movl %eax, (%esp)
1542; X86-NEXT:    movq (%esp), %mm0
1543; X86-NEXT:    pextrw $2, %mm0, %eax
1544; X86-NEXT:    movl %ebp, %esp
1545; X86-NEXT:    popl %ebp
1546; X86-NEXT:    retl
1547;
1548; X64-LABEL: test_mm_extract_pi16:
1549; X64:       # %bb.0: # %entry
1550; X64-NEXT:    movq %rdi, %mm0
1551; X64-NEXT:    pextrw $2, %mm0, %eax
1552; X64-NEXT:    retq
1553entry:
1554  %0 = bitcast <1 x i64> %a.coerce to x86_mmx
1555  %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2)
1556  ret i32 %1
1557}
1558
1559declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg)
1560