1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
4
5declare  i4  @llvm.umul.fix.sat.i4   (i4,  i4, i32)
6declare  i32 @llvm.umul.fix.sat.i32  (i32, i32, i32)
7declare  i64 @llvm.umul.fix.sat.i64  (i64, i64, i32)
8declare  <4 x i32> @llvm.umul.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32)
9
10define i32 @func(i32 %x, i32 %y) nounwind {
11; X64-LABEL: func:
12; X64:       # %bb.0:
13; X64-NEXT:    movl %esi, %eax
14; X64-NEXT:    movl %edi, %ecx
15; X64-NEXT:    imulq %rax, %rcx
16; X64-NEXT:    movq %rcx, %rax
17; X64-NEXT:    shrq $32, %rax
18; X64-NEXT:    shrdl $2, %eax, %ecx
19; X64-NEXT:    cmpl $4, %eax
20; X64-NEXT:    movl $-1, %eax
21; X64-NEXT:    cmovbl %ecx, %eax
22; X64-NEXT:    retq
23;
24; X86-LABEL: func:
25; X86:       # %bb.0:
26; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; X86-NEXT:    mull {{[0-9]+}}(%esp)
28; X86-NEXT:    shrdl $2, %edx, %eax
29; X86-NEXT:    cmpl $4, %edx
30; X86-NEXT:    movl $-1, %ecx
31; X86-NEXT:    cmovael %ecx, %eax
32; X86-NEXT:    retl
33  %tmp = call i32 @llvm.umul.fix.sat.i32(i32 %x, i32 %y, i32 2)
34  ret i32 %tmp
35}
36
37define i64 @func2(i64 %x, i64 %y) nounwind {
38; X64-LABEL: func2:
39; X64:       # %bb.0:
40; X64-NEXT:    movq %rdi, %rax
41; X64-NEXT:    mulq %rsi
42; X64-NEXT:    shrdq $2, %rdx, %rax
43; X64-NEXT:    cmpq $4, %rdx
44; X64-NEXT:    movq $-1, %rcx
45; X64-NEXT:    cmovaeq %rcx, %rax
46; X64-NEXT:    retq
47;
48; X86-LABEL: func2:
49; X86:       # %bb.0:
50; X86-NEXT:    pushl %ebp
51; X86-NEXT:    pushl %ebx
52; X86-NEXT:    pushl %edi
53; X86-NEXT:    pushl %esi
54; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
55; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
56; X86-NEXT:    movl %ecx, %eax
57; X86-NEXT:    mull %esi
58; X86-NEXT:    movl %edx, %edi
59; X86-NEXT:    movl %eax, %ebx
60; X86-NEXT:    movl %ecx, %eax
61; X86-NEXT:    mull {{[0-9]+}}(%esp)
62; X86-NEXT:    movl %eax, %ecx
63; X86-NEXT:    movl %edx, %ebp
64; X86-NEXT:    addl %ebx, %ebp
65; X86-NEXT:    adcl $0, %edi
66; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
67; X86-NEXT:    mull %esi
68; X86-NEXT:    movl %edx, %ebx
69; X86-NEXT:    movl %eax, %esi
70; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
71; X86-NEXT:    mull {{[0-9]+}}(%esp)
72; X86-NEXT:    addl %ebp, %eax
73; X86-NEXT:    adcl %edi, %edx
74; X86-NEXT:    adcl $0, %ebx
75; X86-NEXT:    addl %esi, %edx
76; X86-NEXT:    adcl $0, %ebx
77; X86-NEXT:    shrdl $2, %eax, %ecx
78; X86-NEXT:    shrdl $2, %edx, %eax
79; X86-NEXT:    shrl $2, %edx
80; X86-NEXT:    orl %ebx, %edx
81; X86-NEXT:    movl $-1, %edx
82; X86-NEXT:    cmovnel %edx, %ecx
83; X86-NEXT:    cmovel %eax, %edx
84; X86-NEXT:    movl %ecx, %eax
85; X86-NEXT:    popl %esi
86; X86-NEXT:    popl %edi
87; X86-NEXT:    popl %ebx
88; X86-NEXT:    popl %ebp
89; X86-NEXT:    retl
90  %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 2)
91  ret i64 %tmp
92}
93
94define i4 @func3(i4 %x, i4 %y) nounwind {
95; X64-LABEL: func3:
96; X64:       # %bb.0:
97; X64-NEXT:    andl $15, %esi
98; X64-NEXT:    shlb $4, %dil
99; X64-NEXT:    movzbl %dil, %eax
100; X64-NEXT:    imull %esi, %eax
101; X64-NEXT:    movl %eax, %ecx
102; X64-NEXT:    shrb $2, %cl
103; X64-NEXT:    shrl $8, %eax
104; X64-NEXT:    movl %eax, %edx
105; X64-NEXT:    shlb $6, %dl
106; X64-NEXT:    orb %cl, %dl
107; X64-NEXT:    movzbl %dl, %ecx
108; X64-NEXT:    cmpb $4, %al
109; X64-NEXT:    movl $255, %eax
110; X64-NEXT:    cmovbl %ecx, %eax
111; X64-NEXT:    shrb $4, %al
112; X64-NEXT:    # kill: def $al killed $al killed $eax
113; X64-NEXT:    retq
114;
115; X86-LABEL: func3:
116; X86:       # %bb.0:
117; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
118; X86-NEXT:    andb $15, %al
119; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
120; X86-NEXT:    movzbl %al, %edx
121; X86-NEXT:    shlb $4, %cl
122; X86-NEXT:    movzbl %cl, %eax
123; X86-NEXT:    imull %edx, %eax
124; X86-NEXT:    movb %ah, %cl
125; X86-NEXT:    shlb $6, %cl
126; X86-NEXT:    shrb $2, %al
127; X86-NEXT:    orb %cl, %al
128; X86-NEXT:    movzbl %al, %ecx
129; X86-NEXT:    cmpb $4, %ah
130; X86-NEXT:    movl $255, %eax
131; X86-NEXT:    cmovbl %ecx, %eax
132; X86-NEXT:    shrb $4, %al
133; X86-NEXT:    # kill: def $al killed $al killed $eax
134; X86-NEXT:    retl
135  %tmp = call i4 @llvm.umul.fix.sat.i4(i4 %x, i4 %y, i32 2)
136  ret i4 %tmp
137}
138
139define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
140; X64-LABEL: vec:
141; X64:       # %bb.0:
142; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
143; X64-NEXT:    movd %xmm2, %eax
144; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
145; X64-NEXT:    movd %xmm2, %ecx
146; X64-NEXT:    imulq %rax, %rcx
147; X64-NEXT:    movq %rcx, %rax
148; X64-NEXT:    shrq $32, %rax
149; X64-NEXT:    shrdl $2, %eax, %ecx
150; X64-NEXT:    cmpl $4, %eax
151; X64-NEXT:    movl $-1, %eax
152; X64-NEXT:    cmovael %eax, %ecx
153; X64-NEXT:    movd %ecx, %xmm2
154; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
155; X64-NEXT:    movd %xmm3, %ecx
156; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
157; X64-NEXT:    movd %xmm3, %edx
158; X64-NEXT:    imulq %rcx, %rdx
159; X64-NEXT:    movq %rdx, %rcx
160; X64-NEXT:    shrq $32, %rcx
161; X64-NEXT:    shrdl $2, %ecx, %edx
162; X64-NEXT:    cmpl $4, %ecx
163; X64-NEXT:    cmovael %eax, %edx
164; X64-NEXT:    movd %edx, %xmm3
165; X64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
166; X64-NEXT:    movd %xmm1, %ecx
167; X64-NEXT:    movd %xmm0, %edx
168; X64-NEXT:    imulq %rcx, %rdx
169; X64-NEXT:    movq %rdx, %rcx
170; X64-NEXT:    shrq $32, %rcx
171; X64-NEXT:    shrdl $2, %ecx, %edx
172; X64-NEXT:    cmpl $4, %ecx
173; X64-NEXT:    cmovael %eax, %edx
174; X64-NEXT:    movd %edx, %xmm2
175; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
176; X64-NEXT:    movd %xmm1, %ecx
177; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
178; X64-NEXT:    movd %xmm0, %edx
179; X64-NEXT:    imulq %rcx, %rdx
180; X64-NEXT:    movq %rdx, %rcx
181; X64-NEXT:    shrq $32, %rcx
182; X64-NEXT:    shrdl $2, %ecx, %edx
183; X64-NEXT:    cmpl $4, %ecx
184; X64-NEXT:    cmovael %eax, %edx
185; X64-NEXT:    movd %edx, %xmm0
186; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
187; X64-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
188; X64-NEXT:    movdqa %xmm2, %xmm0
189; X64-NEXT:    retq
190;
191; X86-LABEL: vec:
192; X86:       # %bb.0:
193; X86-NEXT:    pushl %ebp
194; X86-NEXT:    pushl %ebx
195; X86-NEXT:    pushl %edi
196; X86-NEXT:    pushl %esi
197; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
198; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
199; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
200; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
201; X86-NEXT:    mull {{[0-9]+}}(%esp)
202; X86-NEXT:    movl %eax, %esi
203; X86-NEXT:    shrdl $2, %edx, %esi
204; X86-NEXT:    cmpl $4, %edx
205; X86-NEXT:    movl $-1, %ecx
206; X86-NEXT:    cmovael %ecx, %esi
207; X86-NEXT:    movl %ebp, %eax
208; X86-NEXT:    mull {{[0-9]+}}(%esp)
209; X86-NEXT:    movl %eax, %ebp
210; X86-NEXT:    shrdl $2, %edx, %ebp
211; X86-NEXT:    cmpl $4, %edx
212; X86-NEXT:    cmovael %ecx, %ebp
213; X86-NEXT:    movl %ebx, %eax
214; X86-NEXT:    mull {{[0-9]+}}(%esp)
215; X86-NEXT:    movl %eax, %ebx
216; X86-NEXT:    shrdl $2, %edx, %ebx
217; X86-NEXT:    cmpl $4, %edx
218; X86-NEXT:    cmovael %ecx, %ebx
219; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
220; X86-NEXT:    mull {{[0-9]+}}(%esp)
221; X86-NEXT:    shrdl $2, %edx, %eax
222; X86-NEXT:    cmpl $4, %edx
223; X86-NEXT:    cmovael %ecx, %eax
224; X86-NEXT:    movl %eax, 12(%edi)
225; X86-NEXT:    movl %ebx, 8(%edi)
226; X86-NEXT:    movl %ebp, 4(%edi)
227; X86-NEXT:    movl %esi, (%edi)
228; X86-NEXT:    movl %edi, %eax
229; X86-NEXT:    popl %esi
230; X86-NEXT:    popl %edi
231; X86-NEXT:    popl %ebx
232; X86-NEXT:    popl %ebp
233; X86-NEXT:    retl $4
234  %tmp = call <4 x i32> @llvm.umul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2)
235  ret <4 x i32> %tmp
236}
237
238; These result in regular integer multiplication
239define i32 @func4(i32 %x, i32 %y) nounwind {
240; X64-LABEL: func4:
241; X64:       # %bb.0:
242; X64-NEXT:    movl %edi, %eax
243; X64-NEXT:    mull %esi
244; X64-NEXT:    movl $-1, %ecx
245; X64-NEXT:    cmovol %ecx, %eax
246; X64-NEXT:    retq
247;
248; X86-LABEL: func4:
249; X86:       # %bb.0:
250; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
251; X86-NEXT:    mull {{[0-9]+}}(%esp)
252; X86-NEXT:    movl $-1, %ecx
253; X86-NEXT:    cmovol %ecx, %eax
254; X86-NEXT:    retl
255  %tmp = call i32 @llvm.umul.fix.sat.i32(i32 %x, i32 %y, i32 0)
256  ret i32 %tmp
257}
258
259define i64 @func5(i64 %x, i64 %y) {
260; X64-LABEL: func5:
261; X64:       # %bb.0:
262; X64-NEXT:    movq %rdi, %rax
263; X64-NEXT:    mulq %rsi
264; X64-NEXT:    movq $-1, %rcx
265; X64-NEXT:    cmovoq %rcx, %rax
266; X64-NEXT:    retq
267;
268; X86-LABEL: func5:
269; X86:       # %bb.0:
270; X86-NEXT:    pushl %ebp
271; X86-NEXT:    .cfi_def_cfa_offset 8
272; X86-NEXT:    pushl %ebx
273; X86-NEXT:    .cfi_def_cfa_offset 12
274; X86-NEXT:    pushl %edi
275; X86-NEXT:    .cfi_def_cfa_offset 16
276; X86-NEXT:    pushl %esi
277; X86-NEXT:    .cfi_def_cfa_offset 20
278; X86-NEXT:    .cfi_offset %esi, -20
279; X86-NEXT:    .cfi_offset %edi, -16
280; X86-NEXT:    .cfi_offset %ebx, -12
281; X86-NEXT:    .cfi_offset %ebp, -8
282; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
283; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
284; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
285; X86-NEXT:    testl %esi, %esi
286; X86-NEXT:    setne %dl
287; X86-NEXT:    testl %eax, %eax
288; X86-NEXT:    setne %cl
289; X86-NEXT:    andb %dl, %cl
290; X86-NEXT:    mull {{[0-9]+}}(%esp)
291; X86-NEXT:    movl %eax, %edi
292; X86-NEXT:    seto %bl
293; X86-NEXT:    movl %esi, %eax
294; X86-NEXT:    mull %ebp
295; X86-NEXT:    movl %eax, %esi
296; X86-NEXT:    seto %ch
297; X86-NEXT:    orb %bl, %ch
298; X86-NEXT:    addl %edi, %esi
299; X86-NEXT:    movl %ebp, %eax
300; X86-NEXT:    mull {{[0-9]+}}(%esp)
301; X86-NEXT:    addl %esi, %edx
302; X86-NEXT:    setb %bl
303; X86-NEXT:    orb %ch, %bl
304; X86-NEXT:    orb %cl, %bl
305; X86-NEXT:    movl $-1, %ecx
306; X86-NEXT:    cmovnel %ecx, %eax
307; X86-NEXT:    cmovnel %ecx, %edx
308; X86-NEXT:    popl %esi
309; X86-NEXT:    .cfi_def_cfa_offset 16
310; X86-NEXT:    popl %edi
311; X86-NEXT:    .cfi_def_cfa_offset 12
312; X86-NEXT:    popl %ebx
313; X86-NEXT:    .cfi_def_cfa_offset 8
314; X86-NEXT:    popl %ebp
315; X86-NEXT:    .cfi_def_cfa_offset 4
316; X86-NEXT:    retl
317  %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 0)
318  ret i64 %tmp
319}
320
321define i4 @func6(i4 %x, i4 %y) nounwind {
322; X64-LABEL: func6:
323; X64:       # %bb.0:
324; X64-NEXT:    movl %edi, %eax
325; X64-NEXT:    andb $15, %sil
326; X64-NEXT:    shlb $4, %al
327; X64-NEXT:    # kill: def $al killed $al killed $eax
328; X64-NEXT:    mulb %sil
329; X64-NEXT:    movzbl %al, %ecx
330; X64-NEXT:    movl $255, %eax
331; X64-NEXT:    cmovnol %ecx, %eax
332; X64-NEXT:    shrb $4, %al
333; X64-NEXT:    # kill: def $al killed $al killed $eax
334; X64-NEXT:    retq
335;
336; X86-LABEL: func6:
337; X86:       # %bb.0:
338; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
339; X86-NEXT:    andb $15, %cl
340; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
341; X86-NEXT:    shlb $4, %al
342; X86-NEXT:    mulb %cl
343; X86-NEXT:    movzbl %al, %ecx
344; X86-NEXT:    movl $255, %eax
345; X86-NEXT:    cmovnol %ecx, %eax
346; X86-NEXT:    shrb $4, %al
347; X86-NEXT:    # kill: def $al killed $al killed $eax
348; X86-NEXT:    retl
349  %tmp = call i4 @llvm.umul.fix.sat.i4(i4 %x, i4 %y, i32 0)
350  ret i4 %tmp
351}
352
353define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
354; X64-LABEL: vec2:
355; X64:       # %bb.0:
356; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
357; X64-NEXT:    movd %xmm2, %eax
358; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
359; X64-NEXT:    movd %xmm2, %ecx
360; X64-NEXT:    mull %ecx
361; X64-NEXT:    movl $-1, %ecx
362; X64-NEXT:    cmovol %ecx, %eax
363; X64-NEXT:    movd %eax, %xmm2
364; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
365; X64-NEXT:    movd %xmm3, %eax
366; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
367; X64-NEXT:    movd %xmm3, %edx
368; X64-NEXT:    mull %edx
369; X64-NEXT:    cmovol %ecx, %eax
370; X64-NEXT:    movd %eax, %xmm3
371; X64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
372; X64-NEXT:    movd %xmm0, %eax
373; X64-NEXT:    movd %xmm1, %edx
374; X64-NEXT:    mull %edx
375; X64-NEXT:    cmovol %ecx, %eax
376; X64-NEXT:    movd %eax, %xmm2
377; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
378; X64-NEXT:    movd %xmm0, %eax
379; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
380; X64-NEXT:    movd %xmm0, %edx
381; X64-NEXT:    mull %edx
382; X64-NEXT:    cmovol %ecx, %eax
383; X64-NEXT:    movd %eax, %xmm0
384; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
385; X64-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
386; X64-NEXT:    movdqa %xmm2, %xmm0
387; X64-NEXT:    retq
388;
389; X86-LABEL: vec2:
390; X86:       # %bb.0:
391; X86-NEXT:    pushl %ebp
392; X86-NEXT:    pushl %ebx
393; X86-NEXT:    pushl %edi
394; X86-NEXT:    pushl %esi
395; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
396; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
397; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
398; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
399; X86-NEXT:    mull {{[0-9]+}}(%esp)
400; X86-NEXT:    movl %eax, %ebp
401; X86-NEXT:    movl $-1, %esi
402; X86-NEXT:    cmovol %esi, %ebp
403; X86-NEXT:    movl %ebx, %eax
404; X86-NEXT:    mull {{[0-9]+}}(%esp)
405; X86-NEXT:    movl %eax, %ebx
406; X86-NEXT:    cmovol %esi, %ebx
407; X86-NEXT:    movl %edi, %eax
408; X86-NEXT:    mull {{[0-9]+}}(%esp)
409; X86-NEXT:    movl %eax, %edi
410; X86-NEXT:    cmovol %esi, %edi
411; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
412; X86-NEXT:    mull {{[0-9]+}}(%esp)
413; X86-NEXT:    cmovol %esi, %eax
414; X86-NEXT:    movl %eax, 12(%ecx)
415; X86-NEXT:    movl %edi, 8(%ecx)
416; X86-NEXT:    movl %ebx, 4(%ecx)
417; X86-NEXT:    movl %ebp, (%ecx)
418; X86-NEXT:    movl %ecx, %eax
419; X86-NEXT:    popl %esi
420; X86-NEXT:    popl %edi
421; X86-NEXT:    popl %ebx
422; X86-NEXT:    popl %ebp
423; X86-NEXT:    retl $4
424  %tmp = call <4 x i32> @llvm.umul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0)
425  ret <4 x i32> %tmp
426}
427
428define i64 @func7(i64 %x, i64 %y) nounwind {
429; X64-LABEL: func7:
430; X64:       # %bb.0:
431; X64-NEXT:    movq %rdi, %rax
432; X64-NEXT:    mulq %rsi
433; X64-NEXT:    shrdq $32, %rdx, %rax
434; X64-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
435; X64-NEXT:    cmpq %rcx, %rdx
436; X64-NEXT:    movq $-1, %rcx
437; X64-NEXT:    cmovaq %rcx, %rax
438; X64-NEXT:    retq
439;
440; X86-LABEL: func7:
441; X86:       # %bb.0:
442; X86-NEXT:    pushl %ebp
443; X86-NEXT:    pushl %ebx
444; X86-NEXT:    pushl %edi
445; X86-NEXT:    pushl %esi
446; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
447; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
448; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
449; X86-NEXT:    movl %ecx, %eax
450; X86-NEXT:    mull %ebp
451; X86-NEXT:    movl %edx, %edi
452; X86-NEXT:    movl %eax, %ebx
453; X86-NEXT:    movl %ecx, %eax
454; X86-NEXT:    mull %esi
455; X86-NEXT:    movl %edx, %ecx
456; X86-NEXT:    addl %ebx, %ecx
457; X86-NEXT:    adcl $0, %edi
458; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
459; X86-NEXT:    mull %ebp
460; X86-NEXT:    movl %edx, %ebx
461; X86-NEXT:    movl %eax, %ebp
462; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
463; X86-NEXT:    mull %esi
464; X86-NEXT:    addl %ecx, %eax
465; X86-NEXT:    adcl %edi, %edx
466; X86-NEXT:    adcl $0, %ebx
467; X86-NEXT:    addl %ebp, %edx
468; X86-NEXT:    adcl $0, %ebx
469; X86-NEXT:    xorl %ecx, %ecx
470; X86-NEXT:    cmpl $1, %ebx
471; X86-NEXT:    sbbl %ecx, %ecx
472; X86-NEXT:    notl %ecx
473; X86-NEXT:    orl %ecx, %eax
474; X86-NEXT:    orl %ecx, %edx
475; X86-NEXT:    popl %esi
476; X86-NEXT:    popl %edi
477; X86-NEXT:    popl %ebx
478; X86-NEXT:    popl %ebp
479; X86-NEXT:    retl
480  %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 32)
481  ret i64 %tmp
482}
483
484define i64 @func8(i64 %x, i64 %y) nounwind {
485; X64-LABEL: func8:
486; X64:       # %bb.0:
487; X64-NEXT:    movq %rdi, %rax
488; X64-NEXT:    mulq %rsi
489; X64-NEXT:    shrdq $63, %rdx, %rax
490; X64-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
491; X64-NEXT:    cmpq %rcx, %rdx
492; X64-NEXT:    movq $-1, %rcx
493; X64-NEXT:    cmovaq %rcx, %rax
494; X64-NEXT:    retq
495;
496; X86-LABEL: func8:
497; X86:       # %bb.0:
498; X86-NEXT:    pushl %ebp
499; X86-NEXT:    pushl %ebx
500; X86-NEXT:    pushl %edi
501; X86-NEXT:    pushl %esi
502; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
503; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
504; X86-NEXT:    movl %ecx, %eax
505; X86-NEXT:    mull %esi
506; X86-NEXT:    movl %edx, %edi
507; X86-NEXT:    movl %eax, %ebx
508; X86-NEXT:    movl %ecx, %eax
509; X86-NEXT:    mull {{[0-9]+}}(%esp)
510; X86-NEXT:    movl %edx, %ebp
511; X86-NEXT:    addl %ebx, %ebp
512; X86-NEXT:    adcl $0, %edi
513; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
514; X86-NEXT:    mull %esi
515; X86-NEXT:    movl %edx, %ecx
516; X86-NEXT:    movl %eax, %ebx
517; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
518; X86-NEXT:    mull {{[0-9]+}}(%esp)
519; X86-NEXT:    addl %ebp, %eax
520; X86-NEXT:    adcl %edi, %edx
521; X86-NEXT:    adcl $0, %ecx
522; X86-NEXT:    addl %ebx, %edx
523; X86-NEXT:    adcl $0, %ecx
524; X86-NEXT:    shrdl $31, %edx, %eax
525; X86-NEXT:    movl %edx, %esi
526; X86-NEXT:    shrl $31, %esi
527; X86-NEXT:    xorl %edi, %edi
528; X86-NEXT:    cmpl $1, %esi
529; X86-NEXT:    sbbl %edi, %edi
530; X86-NEXT:    notl %edi
531; X86-NEXT:    orl %edi, %eax
532; X86-NEXT:    shldl $1, %edx, %ecx
533; X86-NEXT:    orl %edi, %ecx
534; X86-NEXT:    movl %ecx, %edx
535; X86-NEXT:    popl %esi
536; X86-NEXT:    popl %edi
537; X86-NEXT:    popl %ebx
538; X86-NEXT:    popl %ebp
539; X86-NEXT:    retl
540  %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 63)
541  ret i64 %tmp
542}
543