1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s
3
4define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) {
5; CHECK-LABEL: stack_fold_cvtpd2pi:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
8; CHECK-NEXT:    #APP
9; CHECK-NEXT:    nop
10; CHECK-NEXT:    #NO_APP
11; CHECK-NEXT:    cvtpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
12; CHECK-NEXT:    movq2dq %mm0, %xmm0
13; CHECK-NEXT:    retq
14  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
15  %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone
16  ret x86_mmx %2
17}
18declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
19
20define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) {
21; CHECK-LABEL: stack_fold_cvtpi2pd:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
24; CHECK-NEXT:    #APP
25; CHECK-NEXT:    nop
26; CHECK-NEXT:    #NO_APP
27; CHECK-NEXT:    cvtpi2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
28; CHECK-NEXT:    retq
29  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
30  %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone
31  ret <2 x double> %2
32}
33declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
34
35define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) {
36; CHECK-LABEL: stack_fold_cvtpi2ps:
37; CHECK:       # %bb.0:
38; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
39; CHECK-NEXT:    #APP
40; CHECK-NEXT:    nop
41; CHECK-NEXT:    #NO_APP
42; CHECK-NEXT:    cvtpi2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
43; CHECK-NEXT:    retq
44  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
45  %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone
46  ret <4 x float> %2
47}
48declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
49
50define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) {
51; CHECK-LABEL: stack_fold_cvtps2pi:
52; CHECK:       # %bb.0:
53; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
54; CHECK-NEXT:    #APP
55; CHECK-NEXT:    nop
56; CHECK-NEXT:    #NO_APP
57; CHECK-NEXT:    cvtps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
58; CHECK-NEXT:    movq2dq %mm0, %xmm0
59; CHECK-NEXT:    retq
60  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
61  %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone
62  ret x86_mmx %2
63}
64declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
65
66define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) {
67; CHECK-LABEL: stack_fold_cvttpd2pi:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
70; CHECK-NEXT:    #APP
71; CHECK-NEXT:    nop
72; CHECK-NEXT:    #NO_APP
73; CHECK-NEXT:    cvttpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
74; CHECK-NEXT:    movq2dq %mm0, %xmm0
75; CHECK-NEXT:    retq
76  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
77  %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone
78  ret x86_mmx %2
79}
80declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
81
82define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) {
83; CHECK-LABEL: stack_fold_cvttps2pi:
84; CHECK:       # %bb.0:
85; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
86; CHECK-NEXT:    #APP
87; CHECK-NEXT:    nop
88; CHECK-NEXT:    #NO_APP
89; CHECK-NEXT:    cvttps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload
90; CHECK-NEXT:    movq2dq %mm0, %xmm0
91; CHECK-NEXT:    retq
92  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
93  %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone
94  ret x86_mmx %2
95}
96declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
97
98; TODO stack_fold_movd_load
99
100; padd forces execution on mmx
101define i32 @stack_fold_movd_store(x86_mmx %a0) nounwind {
102; CHECK-LABEL: stack_fold_movd_store:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    pushq %rbp
105; CHECK-NEXT:    pushq %r15
106; CHECK-NEXT:    pushq %r14
107; CHECK-NEXT:    pushq %r13
108; CHECK-NEXT:    pushq %r12
109; CHECK-NEXT:    pushq %rbx
110; CHECK-NEXT:    paddb %mm0, %mm0
111; CHECK-NEXT:    movd %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
112; CHECK-NEXT:    #APP
113; CHECK-NEXT:    nop
114; CHECK-NEXT:    #NO_APP
115; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
116; CHECK-NEXT:    popq %rbx
117; CHECK-NEXT:    popq %r12
118; CHECK-NEXT:    popq %r13
119; CHECK-NEXT:    popq %r14
120; CHECK-NEXT:    popq %r15
121; CHECK-NEXT:    popq %rbp
122; CHECK-NEXT:    retq
123  %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0)
124  %2 = bitcast x86_mmx %1 to <2 x i32>
125  %3 = extractelement <2 x i32> %2, i32 0
126  %4 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
127  ret i32 %3
128}
129
130; TODO stack_fold_movq_load
131
132; padd forces execution on mmx
133define i64 @stack_fold_movq_store(x86_mmx %a0) nounwind {
134; CHECK-LABEL: stack_fold_movq_store:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    pushq %rbp
137; CHECK-NEXT:    pushq %r15
138; CHECK-NEXT:    pushq %r14
139; CHECK-NEXT:    pushq %r13
140; CHECK-NEXT:    pushq %r12
141; CHECK-NEXT:    pushq %rbx
142; CHECK-NEXT:    paddb %mm0, %mm0
143; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
144; CHECK-NEXT:    #APP
145; CHECK-NEXT:    nop
146; CHECK-NEXT:    #NO_APP
147; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
148; CHECK-NEXT:    popq %rbx
149; CHECK-NEXT:    popq %r12
150; CHECK-NEXT:    popq %r13
151; CHECK-NEXT:    popq %r14
152; CHECK-NEXT:    popq %r15
153; CHECK-NEXT:    popq %rbp
154; CHECK-NEXT:    retq
155  %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0)
156  %2 = bitcast x86_mmx %1 to i64
157  %3 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
158  ret i64 %2
159}
160
161define x86_mmx @stack_fold_pabsb(x86_mmx %a0) {
162; CHECK-LABEL: stack_fold_pabsb:
163; CHECK:       # %bb.0:
164; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
165; CHECK-NEXT:    #APP
166; CHECK-NEXT:    nop
167; CHECK-NEXT:    #NO_APP
168; CHECK-NEXT:    pabsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
169; CHECK-NEXT:    movq2dq %mm0, %xmm0
170; CHECK-NEXT:    retq
171  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
172  %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone
173  ret x86_mmx %2
174}
175declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
176
177define x86_mmx @stack_fold_pabsd(x86_mmx %a0) {
178; CHECK-LABEL: stack_fold_pabsd:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
181; CHECK-NEXT:    #APP
182; CHECK-NEXT:    nop
183; CHECK-NEXT:    #NO_APP
184; CHECK-NEXT:    pabsd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
185; CHECK-NEXT:    movq2dq %mm0, %xmm0
186; CHECK-NEXT:    retq
187  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
188  %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone
189  ret x86_mmx %2
190}
191declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
192
193define x86_mmx @stack_fold_pabsw(x86_mmx %a0) {
194; CHECK-LABEL: stack_fold_pabsw:
195; CHECK:       # %bb.0:
196; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
197; CHECK-NEXT:    #APP
198; CHECK-NEXT:    nop
199; CHECK-NEXT:    #NO_APP
200; CHECK-NEXT:    pabsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
201; CHECK-NEXT:    movq2dq %mm0, %xmm0
202; CHECK-NEXT:    retq
203  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
204  %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone
205  ret x86_mmx %2
206}
207declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
208
209define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) {
210; CHECK-LABEL: stack_fold_packssdw:
211; CHECK:       # %bb.0:
212; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
213; CHECK-NEXT:    #APP
214; CHECK-NEXT:    nop
215; CHECK-NEXT:    #NO_APP
216; CHECK-NEXT:    packssdw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
217; CHECK-NEXT:    movq2dq %mm0, %xmm0
218; CHECK-NEXT:    retq
219  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
220  %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone
221  ret x86_mmx %2
222}
223declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
224
225define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) {
226; CHECK-LABEL: stack_fold_packsswb:
227; CHECK:       # %bb.0:
228; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
229; CHECK-NEXT:    #APP
230; CHECK-NEXT:    nop
231; CHECK-NEXT:    #NO_APP
232; CHECK-NEXT:    packsswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
233; CHECK-NEXT:    movq2dq %mm0, %xmm0
234; CHECK-NEXT:    retq
235  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
236  %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone
237  ret x86_mmx %2
238}
239declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
240
241define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) {
242; CHECK-LABEL: stack_fold_packuswb:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
245; CHECK-NEXT:    #APP
246; CHECK-NEXT:    nop
247; CHECK-NEXT:    #NO_APP
248; CHECK-NEXT:    packuswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
249; CHECK-NEXT:    movq2dq %mm0, %xmm0
250; CHECK-NEXT:    retq
251  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
252  %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone
253  ret x86_mmx %2
254}
255declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
256
257define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) {
258; CHECK-LABEL: stack_fold_paddb:
259; CHECK:       # %bb.0:
260; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
261; CHECK-NEXT:    #APP
262; CHECK-NEXT:    nop
263; CHECK-NEXT:    #NO_APP
264; CHECK-NEXT:    paddb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
265; CHECK-NEXT:    movq2dq %mm0, %xmm0
266; CHECK-NEXT:    retq
267  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
268  %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone
269  ret x86_mmx %2
270}
271declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
272
273define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) {
274; CHECK-LABEL: stack_fold_paddd:
275; CHECK:       # %bb.0:
276; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
277; CHECK-NEXT:    #APP
278; CHECK-NEXT:    nop
279; CHECK-NEXT:    #NO_APP
280; CHECK-NEXT:    paddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
281; CHECK-NEXT:    movq2dq %mm0, %xmm0
282; CHECK-NEXT:    retq
283  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
284  %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
285  ret x86_mmx %2
286}
287declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
288
289define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) {
290; CHECK-LABEL: stack_fold_paddq:
291; CHECK:       # %bb.0:
292; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
293; CHECK-NEXT:    #APP
294; CHECK-NEXT:    nop
295; CHECK-NEXT:    #NO_APP
296; CHECK-NEXT:    paddq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
297; CHECK-NEXT:    movq2dq %mm0, %xmm0
298; CHECK-NEXT:    retq
299  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
300  %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone
301  ret x86_mmx %2
302}
303declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
304
305define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) {
306; CHECK-LABEL: stack_fold_paddsb:
307; CHECK:       # %bb.0:
308; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
309; CHECK-NEXT:    #APP
310; CHECK-NEXT:    nop
311; CHECK-NEXT:    #NO_APP
312; CHECK-NEXT:    paddsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
313; CHECK-NEXT:    movq2dq %mm0, %xmm0
314; CHECK-NEXT:    retq
315  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
316  %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone
317  ret x86_mmx %2
318}
319declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
320
321define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) {
322; CHECK-LABEL: stack_fold_paddsw:
323; CHECK:       # %bb.0:
324; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
325; CHECK-NEXT:    #APP
326; CHECK-NEXT:    nop
327; CHECK-NEXT:    #NO_APP
328; CHECK-NEXT:    paddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
329; CHECK-NEXT:    movq2dq %mm0, %xmm0
330; CHECK-NEXT:    retq
331  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
332  %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone
333  ret x86_mmx %2
334}
335declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
336
337define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) {
338; CHECK-LABEL: stack_fold_paddusb:
339; CHECK:       # %bb.0:
340; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
341; CHECK-NEXT:    #APP
342; CHECK-NEXT:    nop
343; CHECK-NEXT:    #NO_APP
344; CHECK-NEXT:    paddusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
345; CHECK-NEXT:    movq2dq %mm0, %xmm0
346; CHECK-NEXT:    retq
347  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
348  %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
349  ret x86_mmx %2
350}
351declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
352
353define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) {
354; CHECK-LABEL: stack_fold_paddusw:
355; CHECK:       # %bb.0:
356; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
357; CHECK-NEXT:    #APP
358; CHECK-NEXT:    nop
359; CHECK-NEXT:    #NO_APP
360; CHECK-NEXT:    paddusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
361; CHECK-NEXT:    movq2dq %mm0, %xmm0
362; CHECK-NEXT:    retq
363  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
364  %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
365  ret x86_mmx %2
366}
367declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
368
369define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) {
370; CHECK-LABEL: stack_fold_paddw:
371; CHECK:       # %bb.0:
372; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
373; CHECK-NEXT:    #APP
374; CHECK-NEXT:    nop
375; CHECK-NEXT:    #NO_APP
376; CHECK-NEXT:    paddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
377; CHECK-NEXT:    movq2dq %mm0, %xmm0
378; CHECK-NEXT:    retq
379  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
380  %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
381  ret x86_mmx %2
382}
383declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
384
385define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) {
386; CHECK-LABEL: stack_fold_palignr:
387; CHECK:       # %bb.0:
388; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
389; CHECK-NEXT:    #APP
390; CHECK-NEXT:    nop
391; CHECK-NEXT:    #NO_APP
392; CHECK-NEXT:    palignr $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
393; CHECK-NEXT:    movq2dq %mm0, %xmm0
394; CHECK-NEXT:    retq
395  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
396  %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone
397  ret x86_mmx %2
398}
399declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
400
401define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) {
402; CHECK-LABEL: stack_fold_pand:
403; CHECK:       # %bb.0:
404; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
405; CHECK-NEXT:    #APP
406; CHECK-NEXT:    nop
407; CHECK-NEXT:    #NO_APP
408; CHECK-NEXT:    pand {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
409; CHECK-NEXT:    movq2dq %mm0, %xmm0
410; CHECK-NEXT:    retq
411  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
412  %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone
413  ret x86_mmx %2
414}
415declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
416
417define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) {
418; CHECK-LABEL: stack_fold_pandn:
419; CHECK:       # %bb.0:
420; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
421; CHECK-NEXT:    #APP
422; CHECK-NEXT:    nop
423; CHECK-NEXT:    #NO_APP
424; CHECK-NEXT:    pandn {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
425; CHECK-NEXT:    movq2dq %mm0, %xmm0
426; CHECK-NEXT:    retq
427  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
428  %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone
429  ret x86_mmx %2
430}
431declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
432
433define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) {
434; CHECK-LABEL: stack_fold_pavgb:
435; CHECK:       # %bb.0:
436; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
437; CHECK-NEXT:    #APP
438; CHECK-NEXT:    nop
439; CHECK-NEXT:    #NO_APP
440; CHECK-NEXT:    pavgb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
441; CHECK-NEXT:    movq2dq %mm0, %xmm0
442; CHECK-NEXT:    retq
443  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
444  %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone
445  ret x86_mmx %2
446}
447declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
448
449define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) {
450; CHECK-LABEL: stack_fold_pavgw:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
453; CHECK-NEXT:    #APP
454; CHECK-NEXT:    nop
455; CHECK-NEXT:    #NO_APP
456; CHECK-NEXT:    pavgw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
457; CHECK-NEXT:    movq2dq %mm0, %xmm0
458; CHECK-NEXT:    retq
459  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
460  %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone
461  ret x86_mmx %2
462}
463declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
464
465define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) {
466; CHECK-LABEL: stack_fold_pcmpeqb:
467; CHECK:       # %bb.0:
468; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
469; CHECK-NEXT:    #APP
470; CHECK-NEXT:    nop
471; CHECK-NEXT:    #NO_APP
472; CHECK-NEXT:    pcmpeqb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
473; CHECK-NEXT:    movq2dq %mm0, %xmm0
474; CHECK-NEXT:    retq
475  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
476  %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone
477  ret x86_mmx %2
478}
479declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
480
481define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) {
482; CHECK-LABEL: stack_fold_pcmpeqd:
483; CHECK:       # %bb.0:
484; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
485; CHECK-NEXT:    #APP
486; CHECK-NEXT:    nop
487; CHECK-NEXT:    #NO_APP
488; CHECK-NEXT:    pcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
489; CHECK-NEXT:    movq2dq %mm0, %xmm0
490; CHECK-NEXT:    retq
491  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
492  %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone
493  ret x86_mmx %2
494}
495declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
496
497define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) {
498; CHECK-LABEL: stack_fold_pcmpeqw:
499; CHECK:       # %bb.0:
500; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
501; CHECK-NEXT:    #APP
502; CHECK-NEXT:    nop
503; CHECK-NEXT:    #NO_APP
504; CHECK-NEXT:    pcmpeqw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
505; CHECK-NEXT:    movq2dq %mm0, %xmm0
506; CHECK-NEXT:    retq
507  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
508  %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone
509  ret x86_mmx %2
510}
511declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
512
513define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) {
514; CHECK-LABEL: stack_fold_pcmpgtb:
515; CHECK:       # %bb.0:
516; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
517; CHECK-NEXT:    #APP
518; CHECK-NEXT:    nop
519; CHECK-NEXT:    #NO_APP
520; CHECK-NEXT:    pcmpgtb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
521; CHECK-NEXT:    movq2dq %mm0, %xmm0
522; CHECK-NEXT:    retq
523  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
524  %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone
525  ret x86_mmx %2
526}
527declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
528
529define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) {
530; CHECK-LABEL: stack_fold_pcmpgtd:
531; CHECK:       # %bb.0:
532; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
533; CHECK-NEXT:    #APP
534; CHECK-NEXT:    nop
535; CHECK-NEXT:    #NO_APP
536; CHECK-NEXT:    pcmpgtd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
537; CHECK-NEXT:    movq2dq %mm0, %xmm0
538; CHECK-NEXT:    retq
539  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
540  %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone
541  ret x86_mmx %2
542}
543declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
544
545define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) {
546; CHECK-LABEL: stack_fold_pcmpgtw:
547; CHECK:       # %bb.0:
548; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
549; CHECK-NEXT:    #APP
550; CHECK-NEXT:    nop
551; CHECK-NEXT:    #NO_APP
552; CHECK-NEXT:    pcmpgtw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
553; CHECK-NEXT:    movq2dq %mm0, %xmm0
554; CHECK-NEXT:    retq
555  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
556  %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone
557  ret x86_mmx %2
558}
559declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
560
561define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) {
562; CHECK-LABEL: stack_fold_phaddd:
563; CHECK:       # %bb.0:
564; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
565; CHECK-NEXT:    #APP
566; CHECK-NEXT:    nop
567; CHECK-NEXT:    #NO_APP
568; CHECK-NEXT:    phaddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
569; CHECK-NEXT:    movq2dq %mm0, %xmm0
570; CHECK-NEXT:    retq
571  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
572  %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
573  ret x86_mmx %2
574}
575declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
576
577define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) {
578; CHECK-LABEL: stack_fold_phaddsw:
579; CHECK:       # %bb.0:
580; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
581; CHECK-NEXT:    #APP
582; CHECK-NEXT:    nop
583; CHECK-NEXT:    #NO_APP
584; CHECK-NEXT:    phaddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
585; CHECK-NEXT:    movq2dq %mm0, %xmm0
586; CHECK-NEXT:    retq
587  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
588  %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
589  ret x86_mmx %2
590}
591declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
592
593define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) {
594; CHECK-LABEL: stack_fold_phaddw:
595; CHECK:       # %bb.0:
596; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
597; CHECK-NEXT:    #APP
598; CHECK-NEXT:    nop
599; CHECK-NEXT:    #NO_APP
600; CHECK-NEXT:    phaddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
601; CHECK-NEXT:    movq2dq %mm0, %xmm0
602; CHECK-NEXT:    retq
603  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
604  %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
605  ret x86_mmx %2
606}
607declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
608
609define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) {
610; CHECK-LABEL: stack_fold_phsubd:
611; CHECK:       # %bb.0:
612; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
613; CHECK-NEXT:    #APP
614; CHECK-NEXT:    nop
615; CHECK-NEXT:    #NO_APP
616; CHECK-NEXT:    phsubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
617; CHECK-NEXT:    movq2dq %mm0, %xmm0
618; CHECK-NEXT:    retq
619  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
620  %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
621  ret x86_mmx %2
622}
623declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
624
625define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) {
626; CHECK-LABEL: stack_fold_phsubsw:
627; CHECK:       # %bb.0:
628; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
629; CHECK-NEXT:    #APP
630; CHECK-NEXT:    nop
631; CHECK-NEXT:    #NO_APP
632; CHECK-NEXT:    phsubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
633; CHECK-NEXT:    movq2dq %mm0, %xmm0
634; CHECK-NEXT:    retq
635  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
636  %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
637  ret x86_mmx %2
638}
639declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
640
641define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) {
642; CHECK-LABEL: stack_fold_phsubw:
643; CHECK:       # %bb.0:
644; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
645; CHECK-NEXT:    #APP
646; CHECK-NEXT:    nop
647; CHECK-NEXT:    #NO_APP
648; CHECK-NEXT:    phsubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
649; CHECK-NEXT:    movq2dq %mm0, %xmm0
650; CHECK-NEXT:    retq
651  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
652  %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
653  ret x86_mmx %2
654}
655declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
656
657; TODO stack_fold_pinsrw
658
659define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) {
660; CHECK-LABEL: stack_fold_pmaddubsw:
661; CHECK:       # %bb.0:
662; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
663; CHECK-NEXT:    #APP
664; CHECK-NEXT:    nop
665; CHECK-NEXT:    #NO_APP
666; CHECK-NEXT:    pmaddubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
667; CHECK-NEXT:    movq2dq %mm0, %xmm0
668; CHECK-NEXT:    retq
669  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
670  %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
671  ret x86_mmx %2
672}
673declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
674
675define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) {
676; CHECK-LABEL: stack_fold_pmaddwd:
677; CHECK:       # %bb.0:
678; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
679; CHECK-NEXT:    #APP
680; CHECK-NEXT:    nop
681; CHECK-NEXT:    #NO_APP
682; CHECK-NEXT:    pmaddwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
683; CHECK-NEXT:    movq2dq %mm0, %xmm0
684; CHECK-NEXT:    retq
685  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
686  %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone
687  ret x86_mmx %2
688}
689declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
690
691define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) {
692; CHECK-LABEL: stack_fold_pmaxsw:
693; CHECK:       # %bb.0:
694; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
695; CHECK-NEXT:    #APP
696; CHECK-NEXT:    nop
697; CHECK-NEXT:    #NO_APP
698; CHECK-NEXT:    pmaxsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
699; CHECK-NEXT:    movq2dq %mm0, %xmm0
700; CHECK-NEXT:    retq
701  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
702  %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
703  ret x86_mmx %2
704}
705declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
706
707define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) {
708; CHECK-LABEL: stack_fold_pmaxub:
709; CHECK:       # %bb.0:
710; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
711; CHECK-NEXT:    #APP
712; CHECK-NEXT:    nop
713; CHECK-NEXT:    #NO_APP
714; CHECK-NEXT:    pmaxub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
715; CHECK-NEXT:    movq2dq %mm0, %xmm0
716; CHECK-NEXT:    retq
717  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
718  %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
719  ret x86_mmx %2
720}
721declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
722
723define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) {
724; CHECK-LABEL: stack_fold_pminsw:
725; CHECK:       # %bb.0:
726; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
727; CHECK-NEXT:    #APP
728; CHECK-NEXT:    nop
729; CHECK-NEXT:    #NO_APP
730; CHECK-NEXT:    pminsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
731; CHECK-NEXT:    movq2dq %mm0, %xmm0
732; CHECK-NEXT:    retq
733  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
734  %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone
735  ret x86_mmx %2
736}
737declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
738
739define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) {
740; CHECK-LABEL: stack_fold_pminub:
741; CHECK:       # %bb.0:
742; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
743; CHECK-NEXT:    #APP
744; CHECK-NEXT:    nop
745; CHECK-NEXT:    #NO_APP
746; CHECK-NEXT:    pminub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
747; CHECK-NEXT:    movq2dq %mm0, %xmm0
748; CHECK-NEXT:    retq
749  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
750  %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
751  ret x86_mmx %2
752}
753declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
754
755define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) {
756; CHECK-LABEL: stack_fold_pmulhrsw:
757; CHECK:       # %bb.0:
758; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
759; CHECK-NEXT:    #APP
760; CHECK-NEXT:    nop
761; CHECK-NEXT:    #NO_APP
762; CHECK-NEXT:    pmulhrsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
763; CHECK-NEXT:    movq2dq %mm0, %xmm0
764; CHECK-NEXT:    retq
765  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
766  %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
767  ret x86_mmx %2
768}
769declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
770
771define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) {
772; CHECK-LABEL: stack_fold_pmulhuw:
773; CHECK:       # %bb.0:
774; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
775; CHECK-NEXT:    #APP
776; CHECK-NEXT:    nop
777; CHECK-NEXT:    #NO_APP
778; CHECK-NEXT:    pmulhuw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
779; CHECK-NEXT:    movq2dq %mm0, %xmm0
780; CHECK-NEXT:    retq
781  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
782  %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone
783  ret x86_mmx %2
784}
785declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
786
787define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) {
788; CHECK-LABEL: stack_fold_pmulhw:
789; CHECK:       # %bb.0:
790; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
791; CHECK-NEXT:    #APP
792; CHECK-NEXT:    nop
793; CHECK-NEXT:    #NO_APP
794; CHECK-NEXT:    pmulhw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
795; CHECK-NEXT:    movq2dq %mm0, %xmm0
796; CHECK-NEXT:    retq
797  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
798  %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone
799  ret x86_mmx %2
800}
801declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
802
803define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) {
804; CHECK-LABEL: stack_fold_pmullw:
805; CHECK:       # %bb.0:
806; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
807; CHECK-NEXT:    #APP
808; CHECK-NEXT:    nop
809; CHECK-NEXT:    #NO_APP
810; CHECK-NEXT:    pmullw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
811; CHECK-NEXT:    movq2dq %mm0, %xmm0
812; CHECK-NEXT:    retq
813  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
814  %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone
815  ret x86_mmx %2
816}
817declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
818
819define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) {
820; CHECK-LABEL: stack_fold_pmuludq:
821; CHECK:       # %bb.0:
822; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
823; CHECK-NEXT:    #APP
824; CHECK-NEXT:    nop
825; CHECK-NEXT:    #NO_APP
826; CHECK-NEXT:    pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
827; CHECK-NEXT:    movq2dq %mm0, %xmm0
828; CHECK-NEXT:    retq
829  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
830  %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone
831  ret x86_mmx %2
832}
833declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
834
835define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) {
836; CHECK-LABEL: stack_fold_por:
837; CHECK:       # %bb.0:
838; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
839; CHECK-NEXT:    #APP
840; CHECK-NEXT:    nop
841; CHECK-NEXT:    #NO_APP
842; CHECK-NEXT:    por {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
843; CHECK-NEXT:    movq2dq %mm0, %xmm0
844; CHECK-NEXT:    retq
845  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
846  %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone
847  ret x86_mmx %2
848}
849declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
850
851define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) {
852; CHECK-LABEL: stack_fold_psadbw:
853; CHECK:       # %bb.0:
854; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
855; CHECK-NEXT:    #APP
856; CHECK-NEXT:    nop
857; CHECK-NEXT:    #NO_APP
858; CHECK-NEXT:    psadbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
859; CHECK-NEXT:    movq2dq %mm0, %xmm0
860; CHECK-NEXT:    retq
861  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
862  %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone
863  ret x86_mmx %2
864}
865declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
866
867define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) {
868; CHECK-LABEL: stack_fold_pshufb:
869; CHECK:       # %bb.0:
870; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
871; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
872; CHECK-NEXT:    #APP
873; CHECK-NEXT:    nop
874; CHECK-NEXT:    #NO_APP
875; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Reload
876; CHECK-NEXT:    pshufb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
877; CHECK-NEXT:    movq2dq %mm0, %xmm0
878; CHECK-NEXT:    retq
879  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
880  %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone
881  ret x86_mmx %2
882}
883declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
884
885define x86_mmx @stack_fold_pshufw(x86_mmx %a) {
886; CHECK-LABEL: stack_fold_pshufw:
887; CHECK:       # %bb.0:
888; CHECK-NEXT:    movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
889; CHECK-NEXT:    #APP
890; CHECK-NEXT:    nop
891; CHECK-NEXT:    #NO_APP
892; CHECK-NEXT:    pshufw $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
893; CHECK-NEXT:    # mm0 = mem[1,0,0,0]
894; CHECK-NEXT:    movq2dq %mm0, %xmm0
895; CHECK-NEXT:    retq
896  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
897  %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone
898  ret x86_mmx %2
899}
900declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
901
902define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) {
903; CHECK-LABEL: stack_fold_psignb:
904; CHECK:       # %bb.0:
905; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
906; CHECK-NEXT:    #APP
907; CHECK-NEXT:    nop
908; CHECK-NEXT:    #NO_APP
909; CHECK-NEXT:    psignb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
910; CHECK-NEXT:    movq2dq %mm0, %xmm0
911; CHECK-NEXT:    retq
912  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
913  %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone
914  ret x86_mmx %2
915}
916declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
917
918define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) {
919; CHECK-LABEL: stack_fold_psignd:
920; CHECK:       # %bb.0:
921; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
922; CHECK-NEXT:    #APP
923; CHECK-NEXT:    nop
924; CHECK-NEXT:    #NO_APP
925; CHECK-NEXT:    psignd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
926; CHECK-NEXT:    movq2dq %mm0, %xmm0
927; CHECK-NEXT:    retq
928  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
929  %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone
930  ret x86_mmx %2
931}
932declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
933
934define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) {
935; CHECK-LABEL: stack_fold_psignw:
936; CHECK:       # %bb.0:
937; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
938; CHECK-NEXT:    #APP
939; CHECK-NEXT:    nop
940; CHECK-NEXT:    #NO_APP
941; CHECK-NEXT:    psignw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
942; CHECK-NEXT:    movq2dq %mm0, %xmm0
943; CHECK-NEXT:    retq
944  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
945  %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone
946  ret x86_mmx %2
947}
948declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
949
950define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) {
951; CHECK-LABEL: stack_fold_pslld:
952; CHECK:       # %bb.0:
953; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
954; CHECK-NEXT:    #APP
955; CHECK-NEXT:    nop
956; CHECK-NEXT:    #NO_APP
957; CHECK-NEXT:    pslld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
958; CHECK-NEXT:    movq2dq %mm0, %xmm0
959; CHECK-NEXT:    retq
960  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
961  %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone
962  ret x86_mmx %2
963}
964declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
965
966define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) {
967; CHECK-LABEL: stack_fold_psllq:
968; CHECK:       # %bb.0:
969; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
970; CHECK-NEXT:    #APP
971; CHECK-NEXT:    nop
972; CHECK-NEXT:    #NO_APP
973; CHECK-NEXT:    psllq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
974; CHECK-NEXT:    movq2dq %mm0, %xmm0
975; CHECK-NEXT:    retq
976  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
977  %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone
978  ret x86_mmx %2
979}
980declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
981
982define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) {
983; CHECK-LABEL: stack_fold_psllw:
984; CHECK:       # %bb.0:
985; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
986; CHECK-NEXT:    #APP
987; CHECK-NEXT:    nop
988; CHECK-NEXT:    #NO_APP
989; CHECK-NEXT:    psllw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
990; CHECK-NEXT:    movq2dq %mm0, %xmm0
991; CHECK-NEXT:    retq
992  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
993  %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone
994  ret x86_mmx %2
995}
996declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
997
998define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) {
999; CHECK-LABEL: stack_fold_psrad:
1000; CHECK:       # %bb.0:
1001; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1002; CHECK-NEXT:    #APP
1003; CHECK-NEXT:    nop
1004; CHECK-NEXT:    #NO_APP
1005; CHECK-NEXT:    psrad {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1006; CHECK-NEXT:    movq2dq %mm0, %xmm0
1007; CHECK-NEXT:    retq
1008  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1009  %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone
1010  ret x86_mmx %2
1011}
1012declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
1013
1014define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) {
1015; CHECK-LABEL: stack_fold_psraw:
1016; CHECK:       # %bb.0:
1017; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1018; CHECK-NEXT:    #APP
1019; CHECK-NEXT:    nop
1020; CHECK-NEXT:    #NO_APP
1021; CHECK-NEXT:    psraw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1022; CHECK-NEXT:    movq2dq %mm0, %xmm0
1023; CHECK-NEXT:    retq
1024  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1025  %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1026  ret x86_mmx %2
1027}
1028declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
1029
1030define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) {
1031; CHECK-LABEL: stack_fold_psrld:
1032; CHECK:       # %bb.0:
1033; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1034; CHECK-NEXT:    #APP
1035; CHECK-NEXT:    nop
1036; CHECK-NEXT:    #NO_APP
1037; CHECK-NEXT:    psrld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1038; CHECK-NEXT:    movq2dq %mm0, %xmm0
1039; CHECK-NEXT:    retq
1040  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1041  %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone
1042  ret x86_mmx %2
1043}
1044declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
1045
1046define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) {
1047; CHECK-LABEL: stack_fold_psrlq:
1048; CHECK:       # %bb.0:
1049; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1050; CHECK-NEXT:    #APP
1051; CHECK-NEXT:    nop
1052; CHECK-NEXT:    #NO_APP
1053; CHECK-NEXT:    psrlq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1054; CHECK-NEXT:    movq2dq %mm0, %xmm0
1055; CHECK-NEXT:    retq
1056  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1057  %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone
1058  ret x86_mmx %2
1059}
1060declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
1061
1062define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) {
1063; CHECK-LABEL: stack_fold_psrlw:
1064; CHECK:       # %bb.0:
1065; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1066; CHECK-NEXT:    #APP
1067; CHECK-NEXT:    nop
1068; CHECK-NEXT:    #NO_APP
1069; CHECK-NEXT:    psrlw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1070; CHECK-NEXT:    movq2dq %mm0, %xmm0
1071; CHECK-NEXT:    retq
1072  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1073  %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1074  ret x86_mmx %2
1075}
1076declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
1077
1078define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) {
1079; CHECK-LABEL: stack_fold_psubb:
1080; CHECK:       # %bb.0:
1081; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1082; CHECK-NEXT:    #APP
1083; CHECK-NEXT:    nop
1084; CHECK-NEXT:    #NO_APP
1085; CHECK-NEXT:    psubb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1086; CHECK-NEXT:    movq2dq %mm0, %xmm0
1087; CHECK-NEXT:    retq
1088  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1089  %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1090  ret x86_mmx %2
1091}
1092declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
1093
1094define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) {
1095; CHECK-LABEL: stack_fold_psubd:
1096; CHECK:       # %bb.0:
1097; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1098; CHECK-NEXT:    #APP
1099; CHECK-NEXT:    nop
1100; CHECK-NEXT:    #NO_APP
1101; CHECK-NEXT:    psubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1102; CHECK-NEXT:    movq2dq %mm0, %xmm0
1103; CHECK-NEXT:    retq
1104  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1105  %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
1106  ret x86_mmx %2
1107}
1108declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
1109
1110define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) {
1111; CHECK-LABEL: stack_fold_psubq:
1112; CHECK:       # %bb.0:
1113; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1114; CHECK-NEXT:    #APP
1115; CHECK-NEXT:    nop
1116; CHECK-NEXT:    #NO_APP
1117; CHECK-NEXT:    psubq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1118; CHECK-NEXT:    movq2dq %mm0, %xmm0
1119; CHECK-NEXT:    retq
1120  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1121  %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone
1122  ret x86_mmx %2
1123}
1124declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
1125
1126define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) {
1127; CHECK-LABEL: stack_fold_psubsb:
1128; CHECK:       # %bb.0:
1129; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1130; CHECK-NEXT:    #APP
1131; CHECK-NEXT:    nop
1132; CHECK-NEXT:    #NO_APP
1133; CHECK-NEXT:    psubsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1134; CHECK-NEXT:    movq2dq %mm0, %xmm0
1135; CHECK-NEXT:    retq
1136  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1137  %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1138  ret x86_mmx %2
1139}
1140declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
1141
1142define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) {
1143; CHECK-LABEL: stack_fold_psubsw:
1144; CHECK:       # %bb.0:
1145; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1146; CHECK-NEXT:    #APP
1147; CHECK-NEXT:    nop
1148; CHECK-NEXT:    #NO_APP
1149; CHECK-NEXT:    psubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1150; CHECK-NEXT:    movq2dq %mm0, %xmm0
1151; CHECK-NEXT:    retq
1152  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1153  %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1154  ret x86_mmx %2
1155}
1156declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
1157
1158define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) {
1159; CHECK-LABEL: stack_fold_psubusb:
1160; CHECK:       # %bb.0:
1161; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1162; CHECK-NEXT:    #APP
1163; CHECK-NEXT:    nop
1164; CHECK-NEXT:    #NO_APP
1165; CHECK-NEXT:    psubusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1166; CHECK-NEXT:    movq2dq %mm0, %xmm0
1167; CHECK-NEXT:    retq
1168  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1169  %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
1170  ret x86_mmx %2
1171}
1172declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
1173
1174define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) {
1175; CHECK-LABEL: stack_fold_psubusw:
1176; CHECK:       # %bb.0:
1177; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1178; CHECK-NEXT:    #APP
1179; CHECK-NEXT:    nop
1180; CHECK-NEXT:    #NO_APP
1181; CHECK-NEXT:    psubusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1182; CHECK-NEXT:    movq2dq %mm0, %xmm0
1183; CHECK-NEXT:    retq
1184  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1185  %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1186  ret x86_mmx %2
1187}
1188declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
1189
1190define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) {
1191; CHECK-LABEL: stack_fold_psubw:
1192; CHECK:       # %bb.0:
1193; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1194; CHECK-NEXT:    #APP
1195; CHECK-NEXT:    nop
1196; CHECK-NEXT:    #NO_APP
1197; CHECK-NEXT:    psubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1198; CHECK-NEXT:    movq2dq %mm0, %xmm0
1199; CHECK-NEXT:    retq
1200  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1201  %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
1202  ret x86_mmx %2
1203}
1204declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
1205
1206define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) {
1207; CHECK-LABEL: stack_fold_punpckhbw:
1208; CHECK:       # %bb.0:
1209; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1210; CHECK-NEXT:    #APP
1211; CHECK-NEXT:    nop
1212; CHECK-NEXT:    #NO_APP
1213; CHECK-NEXT:    punpckhbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1214; CHECK-NEXT:    # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
1215; CHECK-NEXT:    movq2dq %mm0, %xmm0
1216; CHECK-NEXT:    retq
1217  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1218  %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone
1219  ret x86_mmx %2
1220}
1221declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
1222
1223define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) {
1224; CHECK-LABEL: stack_fold_punpckhdq:
1225; CHECK:       # %bb.0:
1226; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1227; CHECK-NEXT:    #APP
1228; CHECK-NEXT:    nop
1229; CHECK-NEXT:    #NO_APP
1230; CHECK-NEXT:    punpckhdq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1231; CHECK-NEXT:    # mm0 = mm0[1],mem[1]
1232; CHECK-NEXT:    movq2dq %mm0, %xmm0
1233; CHECK-NEXT:    retq
1234  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1235  %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone
1236  ret x86_mmx %2
1237}
1238declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
1239
1240define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) {
1241; CHECK-LABEL: stack_fold_punpckhwd:
1242; CHECK:       # %bb.0:
1243; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1244; CHECK-NEXT:    #APP
1245; CHECK-NEXT:    nop
1246; CHECK-NEXT:    #NO_APP
1247; CHECK-NEXT:    punpckhwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1248; CHECK-NEXT:    # mm0 = mm0[2],mem[2],mm0[3],mem[3]
1249; CHECK-NEXT:    movq2dq %mm0, %xmm0
1250; CHECK-NEXT:    retq
1251  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1252  %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone
1253  ret x86_mmx %2
1254}
1255declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
1256
1257define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) {
1258; CHECK-LABEL: stack_fold_punpcklbw:
1259; CHECK:       # %bb.0:
1260; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1261; CHECK-NEXT:    #APP
1262; CHECK-NEXT:    nop
1263; CHECK-NEXT:    #NO_APP
1264; CHECK-NEXT:    punpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1265; CHECK-NEXT:    # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
1266; CHECK-NEXT:    movq2dq %mm0, %xmm0
1267; CHECK-NEXT:    retq
1268  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1269  %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone
1270  ret x86_mmx %2
1271}
1272declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
1273
1274define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) {
1275; CHECK-LABEL: stack_fold_punpckldq:
1276; CHECK:       # %bb.0:
1277; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1278; CHECK-NEXT:    #APP
1279; CHECK-NEXT:    nop
1280; CHECK-NEXT:    #NO_APP
1281; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1282; CHECK-NEXT:    # mm0 = mm0[0],mem[0]
1283; CHECK-NEXT:    movq2dq %mm0, %xmm0
1284; CHECK-NEXT:    retq
1285  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1286  %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone
1287  ret x86_mmx %2
1288}
1289declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
1290
1291define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) {
1292; CHECK-LABEL: stack_fold_punpcklwd:
1293; CHECK:       # %bb.0:
1294; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1295; CHECK-NEXT:    #APP
1296; CHECK-NEXT:    nop
1297; CHECK-NEXT:    #NO_APP
1298; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1299; CHECK-NEXT:    # mm0 = mm0[0],mem[0],mm0[1],mem[1]
1300; CHECK-NEXT:    movq2dq %mm0, %xmm0
1301; CHECK-NEXT:    retq
1302  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1303  %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone
1304  ret x86_mmx %2
1305}
1306declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
1307
1308define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) {
1309; CHECK-LABEL: stack_fold_pxor:
1310; CHECK:       # %bb.0:
1311; CHECK-NEXT:    movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1312; CHECK-NEXT:    #APP
1313; CHECK-NEXT:    nop
1314; CHECK-NEXT:    #NO_APP
1315; CHECK-NEXT:    pxor {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
1316; CHECK-NEXT:    movq2dq %mm0, %xmm0
1317; CHECK-NEXT:    retq
1318  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
1319  %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone
1320  ret x86_mmx %2
1321}
1322declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
1323