1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
3; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,LINUX,FAST
4; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
5; RUN: llc -disable-peephole -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefixes=CHECK,WIN64
6; RUN: llc -disable-peephole -mtriple=i386-pc-win32 < %s | FileCheck %s --check-prefix=WIN32
7
8define {i64, i1} @t1() nounwind {
9; CHECK-LABEL: t1:
10; CHECK:       # %bb.0:
11; CHECK-NEXT:    movl $72, %eax
12; CHECK-NEXT:    xorl %edx, %edx
13; CHECK-NEXT:    retq
14;
15; WIN32-LABEL: t1:
16; WIN32:       # %bb.0:
17; WIN32-NEXT:    movl $72, %eax
18; WIN32-NEXT:    xorl %edx, %edx
19; WIN32-NEXT:    xorl %ecx, %ecx
20; WIN32-NEXT:    retl
21  %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 8)
22  ret {i64, i1} %1
23}
24
25define {i64, i1} @t2() nounwind {
26; CHECK-LABEL: t2:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    xorl %eax, %eax
29; CHECK-NEXT:    xorl %edx, %edx
30; CHECK-NEXT:    retq
31;
32; WIN32-LABEL: t2:
33; WIN32:       # %bb.0:
34; WIN32-NEXT:    xorl %eax, %eax
35; WIN32-NEXT:    xorl %edx, %edx
36; WIN32-NEXT:    xorl %ecx, %ecx
37; WIN32-NEXT:    retl
38  %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 0)
39  ret {i64, i1} %1
40}
41
42define {i64, i1} @t3() nounwind {
43; CHECK-LABEL: t3:
44; CHECK:       # %bb.0:
45; CHECK-NEXT:    movq $-9, %rax
46; CHECK-NEXT:    movb $1, %dl
47; CHECK-NEXT:    retq
48;
49; WIN32-LABEL: t3:
50; WIN32:       # %bb.0:
51; WIN32-NEXT:    movl $-9, %eax
52; WIN32-NEXT:    movl $-1, %edx
53; WIN32-NEXT:    movb $1, %cl
54; WIN32-NEXT:    retl
55  %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 -1)
56  ret {i64, i1} %1
57}
58
59; SMULO
60define zeroext i1 @smuloi8(i8 %v1, i8 %v2, i8* %res) {
61; SDAG-LABEL: smuloi8:
62; SDAG:       # %bb.0:
63; SDAG-NEXT:    movl %edi, %eax
64; SDAG-NEXT:    # kill: def $al killed $al killed $eax
65; SDAG-NEXT:    imulb %sil
66; SDAG-NEXT:    seto %cl
67; SDAG-NEXT:    movb %al, (%rdx)
68; SDAG-NEXT:    movl %ecx, %eax
69; SDAG-NEXT:    retq
70;
71; FAST-LABEL: smuloi8:
72; FAST:       # %bb.0:
73; FAST-NEXT:    movl %edi, %eax
74; FAST-NEXT:    # kill: def $al killed $al killed $eax
75; FAST-NEXT:    imulb %sil
76; FAST-NEXT:    seto %cl
77; FAST-NEXT:    movb %al, (%rdx)
78; FAST-NEXT:    andb $1, %cl
79; FAST-NEXT:    movzbl %cl, %eax
80; FAST-NEXT:    retq
81;
82; WIN64-LABEL: smuloi8:
83; WIN64:       # %bb.0:
84; WIN64-NEXT:    movl %ecx, %eax
85; WIN64-NEXT:    imulb %dl
86; WIN64-NEXT:    seto %cl
87; WIN64-NEXT:    movb %al, (%r8)
88; WIN64-NEXT:    movl %ecx, %eax
89; WIN64-NEXT:    retq
90;
91; WIN32-LABEL: smuloi8:
92; WIN32:       # %bb.0:
93; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
94; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
95; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
96; WIN32-NEXT:    seto %cl
97; WIN32-NEXT:    movb %al, (%edx)
98; WIN32-NEXT:    movl %ecx, %eax
99; WIN32-NEXT:    retl
100  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
101  %val = extractvalue {i8, i1} %t, 0
102  %obit = extractvalue {i8, i1} %t, 1
103  store i8 %val, i8* %res
104  ret i1 %obit
105}
106
107define zeroext i1 @smuloi16(i16 %v1, i16 %v2, i16* %res) {
108; SDAG-LABEL: smuloi16:
109; SDAG:       # %bb.0:
110; SDAG-NEXT:    imulw %si, %di
111; SDAG-NEXT:    seto %al
112; SDAG-NEXT:    movw %di, (%rdx)
113; SDAG-NEXT:    retq
114;
115; FAST-LABEL: smuloi16:
116; FAST:       # %bb.0:
117; FAST-NEXT:    imulw %si, %di
118; FAST-NEXT:    seto %al
119; FAST-NEXT:    movw %di, (%rdx)
120; FAST-NEXT:    andb $1, %al
121; FAST-NEXT:    movzbl %al, %eax
122; FAST-NEXT:    retq
123;
124; WIN64-LABEL: smuloi16:
125; WIN64:       # %bb.0:
126; WIN64-NEXT:    imulw %dx, %cx
127; WIN64-NEXT:    seto %al
128; WIN64-NEXT:    movw %cx, (%r8)
129; WIN64-NEXT:    retq
130;
131; WIN32-LABEL: smuloi16:
132; WIN32:       # %bb.0:
133; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
134; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
135; WIN32-NEXT:    imulw {{[0-9]+}}(%esp), %dx
136; WIN32-NEXT:    seto %al
137; WIN32-NEXT:    movw %dx, (%ecx)
138; WIN32-NEXT:    retl
139  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
140  %val = extractvalue {i16, i1} %t, 0
141  %obit = extractvalue {i16, i1} %t, 1
142  store i16 %val, i16* %res
143  ret i1 %obit
144}
145
146define zeroext i1 @smuloi32(i32 %v1, i32 %v2, i32* %res) {
147; SDAG-LABEL: smuloi32:
148; SDAG:       # %bb.0:
149; SDAG-NEXT:    imull %esi, %edi
150; SDAG-NEXT:    seto %al
151; SDAG-NEXT:    movl %edi, (%rdx)
152; SDAG-NEXT:    retq
153;
154; FAST-LABEL: smuloi32:
155; FAST:       # %bb.0:
156; FAST-NEXT:    imull %esi, %edi
157; FAST-NEXT:    seto %al
158; FAST-NEXT:    movl %edi, (%rdx)
159; FAST-NEXT:    andb $1, %al
160; FAST-NEXT:    movzbl %al, %eax
161; FAST-NEXT:    retq
162;
163; WIN64-LABEL: smuloi32:
164; WIN64:       # %bb.0:
165; WIN64-NEXT:    imull %edx, %ecx
166; WIN64-NEXT:    seto %al
167; WIN64-NEXT:    movl %ecx, (%r8)
168; WIN64-NEXT:    retq
169;
170; WIN32-LABEL: smuloi32:
171; WIN32:       # %bb.0:
172; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
173; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
174; WIN32-NEXT:    imull {{[0-9]+}}(%esp), %edx
175; WIN32-NEXT:    seto %al
176; WIN32-NEXT:    movl %edx, (%ecx)
177; WIN32-NEXT:    retl
178  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
179  %val = extractvalue {i32, i1} %t, 0
180  %obit = extractvalue {i32, i1} %t, 1
181  store i32 %val, i32* %res
182  ret i1 %obit
183}
184
185define zeroext i1 @smuloi64(i64 %v1, i64 %v2, i64* %res) {
186; SDAG-LABEL: smuloi64:
187; SDAG:       # %bb.0:
188; SDAG-NEXT:    imulq %rsi, %rdi
189; SDAG-NEXT:    seto %al
190; SDAG-NEXT:    movq %rdi, (%rdx)
191; SDAG-NEXT:    retq
192;
193; FAST-LABEL: smuloi64:
194; FAST:       # %bb.0:
195; FAST-NEXT:    imulq %rsi, %rdi
196; FAST-NEXT:    seto %al
197; FAST-NEXT:    movq %rdi, (%rdx)
198; FAST-NEXT:    andb $1, %al
199; FAST-NEXT:    movzbl %al, %eax
200; FAST-NEXT:    retq
201;
202; WIN64-LABEL: smuloi64:
203; WIN64:       # %bb.0:
204; WIN64-NEXT:    imulq %rdx, %rcx
205; WIN64-NEXT:    seto %al
206; WIN64-NEXT:    movq %rcx, (%r8)
207; WIN64-NEXT:    retq
208;
209; WIN32-LABEL: smuloi64:
210; WIN32:       # %bb.0:
211; WIN32-NEXT:    pushl %ebx
212; WIN32-NEXT:    pushl %edi
213; WIN32-NEXT:    pushl %esi
214; WIN32-NEXT:    pushl %eax
215; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
216; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
217; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
218; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
219; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edi
220; WIN32-NEXT:    movl $0, (%esp)
221; WIN32-NEXT:    movl %esp, %ebx
222; WIN32-NEXT:    pushl %ebx
223; WIN32-NEXT:    pushl %edi
224; WIN32-NEXT:    pushl %edx
225; WIN32-NEXT:    pushl %ecx
226; WIN32-NEXT:    pushl %eax
227; WIN32-NEXT:    calll ___mulodi4
228; WIN32-NEXT:    addl $20, %esp
229; WIN32-NEXT:    cmpl $0, (%esp)
230; WIN32-NEXT:    setne %cl
231; WIN32-NEXT:    movl %edx, 4(%esi)
232; WIN32-NEXT:    movl %eax, (%esi)
233; WIN32-NEXT:    movl %ecx, %eax
234; WIN32-NEXT:    addl $4, %esp
235; WIN32-NEXT:    popl %esi
236; WIN32-NEXT:    popl %edi
237; WIN32-NEXT:    popl %ebx
238; WIN32-NEXT:    retl
239  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
240  %val = extractvalue {i64, i1} %t, 0
241  %obit = extractvalue {i64, i1} %t, 1
242  store i64 %val, i64* %res
243  ret i1 %obit
244}
245
246; UMULO
247define zeroext i1 @umuloi8(i8 %v1, i8 %v2, i8* %res) {
248; SDAG-LABEL: umuloi8:
249; SDAG:       # %bb.0:
250; SDAG-NEXT:    movl %edi, %eax
251; SDAG-NEXT:    # kill: def $al killed $al killed $eax
252; SDAG-NEXT:    mulb %sil
253; SDAG-NEXT:    seto %cl
254; SDAG-NEXT:    movb %al, (%rdx)
255; SDAG-NEXT:    movl %ecx, %eax
256; SDAG-NEXT:    retq
257;
258; FAST-LABEL: umuloi8:
259; FAST:       # %bb.0:
260; FAST-NEXT:    movl %edi, %eax
261; FAST-NEXT:    # kill: def $al killed $al killed $eax
262; FAST-NEXT:    mulb %sil
263; FAST-NEXT:    seto %cl
264; FAST-NEXT:    movb %al, (%rdx)
265; FAST-NEXT:    andb $1, %cl
266; FAST-NEXT:    movzbl %cl, %eax
267; FAST-NEXT:    retq
268;
269; WIN64-LABEL: umuloi8:
270; WIN64:       # %bb.0:
271; WIN64-NEXT:    movl %ecx, %eax
272; WIN64-NEXT:    mulb %dl
273; WIN64-NEXT:    seto %cl
274; WIN64-NEXT:    movb %al, (%r8)
275; WIN64-NEXT:    movl %ecx, %eax
276; WIN64-NEXT:    retq
277;
278; WIN32-LABEL: umuloi8:
279; WIN32:       # %bb.0:
280; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
281; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
282; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
283; WIN32-NEXT:    seto %cl
284; WIN32-NEXT:    movb %al, (%edx)
285; WIN32-NEXT:    movl %ecx, %eax
286; WIN32-NEXT:    retl
287  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
288  %val = extractvalue {i8, i1} %t, 0
289  %obit = extractvalue {i8, i1} %t, 1
290  store i8 %val, i8* %res
291  ret i1 %obit
292}
293
294define zeroext i1 @umuloi16(i16 %v1, i16 %v2, i16* %res) {
295; SDAG-LABEL: umuloi16:
296; SDAG:       # %bb.0:
297; SDAG-NEXT:    movq %rdx, %rcx
298; SDAG-NEXT:    movl %edi, %eax
299; SDAG-NEXT:    # kill: def $ax killed $ax killed $eax
300; SDAG-NEXT:    mulw %si
301; SDAG-NEXT:    seto %dl
302; SDAG-NEXT:    movw %ax, (%rcx)
303; SDAG-NEXT:    movl %edx, %eax
304; SDAG-NEXT:    retq
305;
306; FAST-LABEL: umuloi16:
307; FAST:       # %bb.0:
308; FAST-NEXT:    movq %rdx, %rcx
309; FAST-NEXT:    movl %edi, %eax
310; FAST-NEXT:    # kill: def $ax killed $ax killed $eax
311; FAST-NEXT:    mulw %si
312; FAST-NEXT:    seto %dl
313; FAST-NEXT:    movw %ax, (%rcx)
314; FAST-NEXT:    andb $1, %dl
315; FAST-NEXT:    movzbl %dl, %eax
316; FAST-NEXT:    retq
317;
318; WIN64-LABEL: umuloi16:
319; WIN64:       # %bb.0:
320; WIN64-NEXT:    movl %ecx, %eax
321; WIN64-NEXT:    mulw %dx
322; WIN64-NEXT:    seto %cl
323; WIN64-NEXT:    movw %ax, (%r8)
324; WIN64-NEXT:    movl %ecx, %eax
325; WIN64-NEXT:    retq
326;
327; WIN32-LABEL: umuloi16:
328; WIN32:       # %bb.0:
329; WIN32-NEXT:    pushl %esi
330; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
331; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
332; WIN32-NEXT:    mulw {{[0-9]+}}(%esp)
333; WIN32-NEXT:    seto %cl
334; WIN32-NEXT:    movw %ax, (%esi)
335; WIN32-NEXT:    movl %ecx, %eax
336; WIN32-NEXT:    popl %esi
337; WIN32-NEXT:    retl
338  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
339  %val = extractvalue {i16, i1} %t, 0
340  %obit = extractvalue {i16, i1} %t, 1
341  store i16 %val, i16* %res
342  ret i1 %obit
343}
344
345define zeroext i1 @umuloi32(i32 %v1, i32 %v2, i32* %res) {
346; SDAG-LABEL: umuloi32:
347; SDAG:       # %bb.0:
348; SDAG-NEXT:    movq %rdx, %rcx
349; SDAG-NEXT:    movl %edi, %eax
350; SDAG-NEXT:    mull %esi
351; SDAG-NEXT:    seto %dl
352; SDAG-NEXT:    movl %eax, (%rcx)
353; SDAG-NEXT:    movl %edx, %eax
354; SDAG-NEXT:    retq
355;
356; FAST-LABEL: umuloi32:
357; FAST:       # %bb.0:
358; FAST-NEXT:    movq %rdx, %rcx
359; FAST-NEXT:    movl %edi, %eax
360; FAST-NEXT:    mull %esi
361; FAST-NEXT:    seto %dl
362; FAST-NEXT:    movl %eax, (%rcx)
363; FAST-NEXT:    andb $1, %dl
364; FAST-NEXT:    movzbl %dl, %eax
365; FAST-NEXT:    retq
366;
367; WIN64-LABEL: umuloi32:
368; WIN64:       # %bb.0:
369; WIN64-NEXT:    movl %ecx, %eax
370; WIN64-NEXT:    mull %edx
371; WIN64-NEXT:    seto %cl
372; WIN64-NEXT:    movl %eax, (%r8)
373; WIN64-NEXT:    movl %ecx, %eax
374; WIN64-NEXT:    retq
375;
376; WIN32-LABEL: umuloi32:
377; WIN32:       # %bb.0:
378; WIN32-NEXT:    pushl %esi
379; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
380; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
381; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
382; WIN32-NEXT:    seto %cl
383; WIN32-NEXT:    movl %eax, (%esi)
384; WIN32-NEXT:    movl %ecx, %eax
385; WIN32-NEXT:    popl %esi
386; WIN32-NEXT:    retl
387  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
388  %val = extractvalue {i32, i1} %t, 0
389  %obit = extractvalue {i32, i1} %t, 1
390  store i32 %val, i32* %res
391  ret i1 %obit
392}
393
394define zeroext i1 @umuloi64(i64 %v1, i64 %v2, i64* %res) {
395; SDAG-LABEL: umuloi64:
396; SDAG:       # %bb.0:
397; SDAG-NEXT:    movq %rdx, %rcx
398; SDAG-NEXT:    movq %rdi, %rax
399; SDAG-NEXT:    mulq %rsi
400; SDAG-NEXT:    seto %dl
401; SDAG-NEXT:    movq %rax, (%rcx)
402; SDAG-NEXT:    movl %edx, %eax
403; SDAG-NEXT:    retq
404;
405; FAST-LABEL: umuloi64:
406; FAST:       # %bb.0:
407; FAST-NEXT:    movq %rdx, %rcx
408; FAST-NEXT:    movq %rdi, %rax
409; FAST-NEXT:    mulq %rsi
410; FAST-NEXT:    seto %dl
411; FAST-NEXT:    movq %rax, (%rcx)
412; FAST-NEXT:    andb $1, %dl
413; FAST-NEXT:    movzbl %dl, %eax
414; FAST-NEXT:    retq
415;
416; WIN64-LABEL: umuloi64:
417; WIN64:       # %bb.0:
418; WIN64-NEXT:    movq %rcx, %rax
419; WIN64-NEXT:    mulq %rdx
420; WIN64-NEXT:    seto %cl
421; WIN64-NEXT:    movq %rax, (%r8)
422; WIN64-NEXT:    movl %ecx, %eax
423; WIN64-NEXT:    retq
424;
425; WIN32-LABEL: umuloi64:
426; WIN32:       # %bb.0:
427; WIN32-NEXT:    pushl %ebp
428; WIN32-NEXT:    pushl %ebx
429; WIN32-NEXT:    pushl %edi
430; WIN32-NEXT:    pushl %esi
431; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
432; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
433; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
434; WIN32-NEXT:    testl %esi, %esi
435; WIN32-NEXT:    setne %dl
436; WIN32-NEXT:    testl %eax, %eax
437; WIN32-NEXT:    setne %bl
438; WIN32-NEXT:    andb %dl, %bl
439; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
440; WIN32-NEXT:    movl %eax, %edi
441; WIN32-NEXT:    seto %cl
442; WIN32-NEXT:    movl %esi, %eax
443; WIN32-NEXT:    mull %ebp
444; WIN32-NEXT:    movl %eax, %esi
445; WIN32-NEXT:    seto %ch
446; WIN32-NEXT:    orb %cl, %ch
447; WIN32-NEXT:    addl %edi, %esi
448; WIN32-NEXT:    movl %ebp, %eax
449; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
450; WIN32-NEXT:    addl %esi, %edx
451; WIN32-NEXT:    setb %cl
452; WIN32-NEXT:    orb %ch, %cl
453; WIN32-NEXT:    orb %bl, %cl
454; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
455; WIN32-NEXT:    movl %eax, (%esi)
456; WIN32-NEXT:    movl %edx, 4(%esi)
457; WIN32-NEXT:    movl %ecx, %eax
458; WIN32-NEXT:    popl %esi
459; WIN32-NEXT:    popl %edi
460; WIN32-NEXT:    popl %ebx
461; WIN32-NEXT:    popl %ebp
462; WIN32-NEXT:    retl
463  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
464  %val = extractvalue {i64, i1} %t, 0
465  %obit = extractvalue {i64, i1} %t, 1
466  store i64 %val, i64* %res
467  ret i1 %obit
468}
469
470;
471; Check the use of the overflow bit in combination with a select instruction.
472;
473define i32 @smuloselecti32(i32 %v1, i32 %v2) {
474; LINUX-LABEL: smuloselecti32:
475; LINUX:       # %bb.0:
476; LINUX-NEXT:    movl %esi, %eax
477; LINUX-NEXT:    movl %edi, %ecx
478; LINUX-NEXT:    imull %esi, %ecx
479; LINUX-NEXT:    cmovol %edi, %eax
480; LINUX-NEXT:    retq
481;
482; WIN64-LABEL: smuloselecti32:
483; WIN64:       # %bb.0:
484; WIN64-NEXT:    movl %edx, %eax
485; WIN64-NEXT:    movl %ecx, %edx
486; WIN64-NEXT:    imull %eax, %edx
487; WIN64-NEXT:    cmovol %ecx, %eax
488; WIN64-NEXT:    retq
489;
490; WIN32-LABEL: smuloselecti32:
491; WIN32:       # %bb.0:
492; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
493; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
494; WIN32-NEXT:    movl %eax, %edx
495; WIN32-NEXT:    imull %ecx, %edx
496; WIN32-NEXT:    jo LBB11_2
497; WIN32-NEXT:  # %bb.1:
498; WIN32-NEXT:    movl %ecx, %eax
499; WIN32-NEXT:  LBB11_2:
500; WIN32-NEXT:    retl
501  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
502  %obit = extractvalue {i32, i1} %t, 1
503  %ret = select i1 %obit, i32 %v1, i32 %v2
504  ret i32 %ret
505}
506
507define i64 @smuloselecti64(i64 %v1, i64 %v2) {
508; LINUX-LABEL: smuloselecti64:
509; LINUX:       # %bb.0:
510; LINUX-NEXT:    movq %rsi, %rax
511; LINUX-NEXT:    movq %rdi, %rcx
512; LINUX-NEXT:    imulq %rsi, %rcx
513; LINUX-NEXT:    cmovoq %rdi, %rax
514; LINUX-NEXT:    retq
515;
516; WIN64-LABEL: smuloselecti64:
517; WIN64:       # %bb.0:
518; WIN64-NEXT:    movq %rdx, %rax
519; WIN64-NEXT:    movq %rcx, %rdx
520; WIN64-NEXT:    imulq %rax, %rdx
521; WIN64-NEXT:    cmovoq %rcx, %rax
522; WIN64-NEXT:    retq
523;
524; WIN32-LABEL: smuloselecti64:
525; WIN32:       # %bb.0:
526; WIN32-NEXT:    pushl %ebp
527; WIN32-NEXT:    pushl %ebx
528; WIN32-NEXT:    pushl %edi
529; WIN32-NEXT:    pushl %esi
530; WIN32-NEXT:    pushl %eax
531; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
532; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edi
533; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
534; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
535; WIN32-NEXT:    movl $0, (%esp)
536; WIN32-NEXT:    movl %esp, %eax
537; WIN32-NEXT:    pushl %eax
538; WIN32-NEXT:    pushl %ebp
539; WIN32-NEXT:    pushl %ebx
540; WIN32-NEXT:    pushl %edi
541; WIN32-NEXT:    pushl %esi
542; WIN32-NEXT:    calll ___mulodi4
543; WIN32-NEXT:    addl $20, %esp
544; WIN32-NEXT:    cmpl $0, (%esp)
545; WIN32-NEXT:    jne LBB12_2
546; WIN32-NEXT:  # %bb.1:
547; WIN32-NEXT:    movl %ebx, %esi
548; WIN32-NEXT:    movl %ebp, %edi
549; WIN32-NEXT:  LBB12_2:
550; WIN32-NEXT:    movl %esi, %eax
551; WIN32-NEXT:    movl %edi, %edx
552; WIN32-NEXT:    addl $4, %esp
553; WIN32-NEXT:    popl %esi
554; WIN32-NEXT:    popl %edi
555; WIN32-NEXT:    popl %ebx
556; WIN32-NEXT:    popl %ebp
557; WIN32-NEXT:    retl
558  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
559  %obit = extractvalue {i64, i1} %t, 1
560  %ret = select i1 %obit, i64 %v1, i64 %v2
561  ret i64 %ret
562}
563
564define i32 @umuloselecti32(i32 %v1, i32 %v2) {
565; LINUX-LABEL: umuloselecti32:
566; LINUX:       # %bb.0:
567; LINUX-NEXT:    movl %edi, %eax
568; LINUX-NEXT:    mull %esi
569; LINUX-NEXT:    cmovol %edi, %esi
570; LINUX-NEXT:    movl %esi, %eax
571; LINUX-NEXT:    retq
572;
573; WIN64-LABEL: umuloselecti32:
574; WIN64:       # %bb.0:
575; WIN64-NEXT:    movl %edx, %r8d
576; WIN64-NEXT:    movl %ecx, %eax
577; WIN64-NEXT:    mull %edx
578; WIN64-NEXT:    cmovol %ecx, %r8d
579; WIN64-NEXT:    movl %r8d, %eax
580; WIN64-NEXT:    retq
581;
582; WIN32-LABEL: umuloselecti32:
583; WIN32:       # %bb.0:
584; WIN32-NEXT:    pushl %esi
585; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
586; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
587; WIN32-NEXT:    movl %ecx, %eax
588; WIN32-NEXT:    mull %esi
589; WIN32-NEXT:    jo LBB13_2
590; WIN32-NEXT:  # %bb.1:
591; WIN32-NEXT:    movl %esi, %ecx
592; WIN32-NEXT:  LBB13_2:
593; WIN32-NEXT:    movl %ecx, %eax
594; WIN32-NEXT:    popl %esi
595; WIN32-NEXT:    retl
596  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
597  %obit = extractvalue {i32, i1} %t, 1
598  %ret = select i1 %obit, i32 %v1, i32 %v2
599  ret i32 %ret
600}
601
602define i64 @umuloselecti64(i64 %v1, i64 %v2) {
603; LINUX-LABEL: umuloselecti64:
604; LINUX:       # %bb.0:
605; LINUX-NEXT:    movq %rdi, %rax
606; LINUX-NEXT:    mulq %rsi
607; LINUX-NEXT:    cmovoq %rdi, %rsi
608; LINUX-NEXT:    movq %rsi, %rax
609; LINUX-NEXT:    retq
610;
611; WIN64-LABEL: umuloselecti64:
612; WIN64:       # %bb.0:
613; WIN64-NEXT:    movq %rdx, %r8
614; WIN64-NEXT:    movq %rcx, %rax
615; WIN64-NEXT:    mulq %rdx
616; WIN64-NEXT:    cmovoq %rcx, %r8
617; WIN64-NEXT:    movq %r8, %rax
618; WIN64-NEXT:    retq
619;
620; WIN32-LABEL: umuloselecti64:
621; WIN32:       # %bb.0:
622; WIN32-NEXT:    pushl %ebp
623; WIN32-NEXT:    pushl %ebx
624; WIN32-NEXT:    pushl %edi
625; WIN32-NEXT:    pushl %esi
626; WIN32-NEXT:    pushl %eax
627; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
628; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
629; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
630; WIN32-NEXT:    testl %ebp, %ebp
631; WIN32-NEXT:    setne %al
632; WIN32-NEXT:    testl %esi, %esi
633; WIN32-NEXT:    setne %bl
634; WIN32-NEXT:    andb %al, %bl
635; WIN32-NEXT:    movl %esi, %eax
636; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
637; WIN32-NEXT:    movl %eax, %edi
638; WIN32-NEXT:    seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
639; WIN32-NEXT:    movl %ebp, %eax
640; WIN32-NEXT:    mull %ecx
641; WIN32-NEXT:    movl %eax, %ebp
642; WIN32-NEXT:    seto %bh
643; WIN32-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
644; WIN32-NEXT:    addl %edi, %ebp
645; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edi
646; WIN32-NEXT:    movl %ecx, %eax
647; WIN32-NEXT:    mull %edi
648; WIN32-NEXT:    addl %ebp, %edx
649; WIN32-NEXT:    setb %al
650; WIN32-NEXT:    orb %bh, %al
651; WIN32-NEXT:    orb %bl, %al
652; WIN32-NEXT:    testb %al, %al
653; WIN32-NEXT:    jne LBB14_2
654; WIN32-NEXT:  # %bb.1:
655; WIN32-NEXT:    movl %edi, %ecx
656; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
657; WIN32-NEXT:  LBB14_2:
658; WIN32-NEXT:    movl %ecx, %eax
659; WIN32-NEXT:    movl %esi, %edx
660; WIN32-NEXT:    addl $4, %esp
661; WIN32-NEXT:    popl %esi
662; WIN32-NEXT:    popl %edi
663; WIN32-NEXT:    popl %ebx
664; WIN32-NEXT:    popl %ebp
665; WIN32-NEXT:    retl
666  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
667  %obit = extractvalue {i64, i1} %t, 1
668  %ret = select i1 %obit, i64 %v1, i64 %v2
669  ret i64 %ret
670}
671
672;
673; Check the use of the overflow bit in combination with a branch instruction.
674;
675define zeroext i1 @smulobri8(i8 %v1, i8 %v2) {
676; SDAG-LABEL: smulobri8:
677; SDAG:       # %bb.0:
678; SDAG-NEXT:    movl %edi, %eax
679; SDAG-NEXT:    # kill: def $al killed $al killed $eax
680; SDAG-NEXT:    imulb %sil
681; SDAG-NEXT:    jo .LBB15_1
682; SDAG-NEXT:  # %bb.2: # %continue
683; SDAG-NEXT:    movb $1, %al
684; SDAG-NEXT:    retq
685; SDAG-NEXT:  .LBB15_1: # %overflow
686; SDAG-NEXT:    xorl %eax, %eax
687; SDAG-NEXT:    retq
688;
689; FAST-LABEL: smulobri8:
690; FAST:       # %bb.0:
691; FAST-NEXT:    movl %edi, %eax
692; FAST-NEXT:    # kill: def $al killed $al killed $eax
693; FAST-NEXT:    imulb %sil
694; FAST-NEXT:    seto %al
695; FAST-NEXT:    testb $1, %al
696; FAST-NEXT:    jne .LBB15_1
697; FAST-NEXT:  # %bb.2: # %continue
698; FAST-NEXT:    movb $1, %al
699; FAST-NEXT:    andb $1, %al
700; FAST-NEXT:    movzbl %al, %eax
701; FAST-NEXT:    retq
702; FAST-NEXT:  .LBB15_1: # %overflow
703; FAST-NEXT:    xorl %eax, %eax
704; FAST-NEXT:    andb $1, %al
705; FAST-NEXT:    movzbl %al, %eax
706; FAST-NEXT:    retq
707;
708; WIN64-LABEL: smulobri8:
709; WIN64:       # %bb.0:
710; WIN64-NEXT:    movl %ecx, %eax
711; WIN64-NEXT:    imulb %dl
712; WIN64-NEXT:    jo .LBB15_1
713; WIN64-NEXT:  # %bb.2: # %continue
714; WIN64-NEXT:    movb $1, %al
715; WIN64-NEXT:    retq
716; WIN64-NEXT:  .LBB15_1: # %overflow
717; WIN64-NEXT:    xorl %eax, %eax
718; WIN64-NEXT:    retq
719;
720; WIN32-LABEL: smulobri8:
721; WIN32:       # %bb.0:
722; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
723; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
724; WIN32-NEXT:    jo LBB15_1
725; WIN32-NEXT:  # %bb.2: # %continue
726; WIN32-NEXT:    movb $1, %al
727; WIN32-NEXT:    retl
728; WIN32-NEXT:  LBB15_1: # %overflow
729; WIN32-NEXT:    xorl %eax, %eax
730; WIN32-NEXT:    retl
731  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
732  %val = extractvalue {i8, i1} %t, 0
733  %obit = extractvalue {i8, i1} %t, 1
734  br i1 %obit, label %overflow, label %continue, !prof !0
735
736overflow:
737  ret i1 false
738
739continue:
740  ret i1 true
741}
742
743define zeroext i1 @smulobri16(i16 %v1, i16 %v2) {
744; SDAG-LABEL: smulobri16:
745; SDAG:       # %bb.0:
746; SDAG-NEXT:    imulw %si, %di
747; SDAG-NEXT:    jo .LBB16_1
748; SDAG-NEXT:  # %bb.2: # %continue
749; SDAG-NEXT:    movb $1, %al
750; SDAG-NEXT:    retq
751; SDAG-NEXT:  .LBB16_1: # %overflow
752; SDAG-NEXT:    xorl %eax, %eax
753; SDAG-NEXT:    retq
754;
755; FAST-LABEL: smulobri16:
756; FAST:       # %bb.0:
757; FAST-NEXT:    imulw %si, %di
758; FAST-NEXT:    seto %al
759; FAST-NEXT:    testb $1, %al
760; FAST-NEXT:    jne .LBB16_1
761; FAST-NEXT:  # %bb.2: # %continue
762; FAST-NEXT:    movb $1, %al
763; FAST-NEXT:    andb $1, %al
764; FAST-NEXT:    movzbl %al, %eax
765; FAST-NEXT:    retq
766; FAST-NEXT:  .LBB16_1: # %overflow
767; FAST-NEXT:    xorl %eax, %eax
768; FAST-NEXT:    andb $1, %al
769; FAST-NEXT:    movzbl %al, %eax
770; FAST-NEXT:    retq
771;
772; WIN64-LABEL: smulobri16:
773; WIN64:       # %bb.0:
774; WIN64-NEXT:    imulw %dx, %cx
775; WIN64-NEXT:    jo .LBB16_1
776; WIN64-NEXT:  # %bb.2: # %continue
777; WIN64-NEXT:    movb $1, %al
778; WIN64-NEXT:    retq
779; WIN64-NEXT:  .LBB16_1: # %overflow
780; WIN64-NEXT:    xorl %eax, %eax
781; WIN64-NEXT:    retq
782;
783; WIN32-LABEL: smulobri16:
784; WIN32:       # %bb.0:
785; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
786; WIN32-NEXT:    imulw {{[0-9]+}}(%esp), %ax
787; WIN32-NEXT:    jo LBB16_1
788; WIN32-NEXT:  # %bb.2: # %continue
789; WIN32-NEXT:    movb $1, %al
790; WIN32-NEXT:    retl
791; WIN32-NEXT:  LBB16_1: # %overflow
792; WIN32-NEXT:    xorl %eax, %eax
793; WIN32-NEXT:    retl
794  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
795  %val = extractvalue {i16, i1} %t, 0
796  %obit = extractvalue {i16, i1} %t, 1
797  br i1 %obit, label %overflow, label %continue, !prof !0
798
799overflow:
800  ret i1 false
801
802continue:
803  ret i1 true
804}
805
806define zeroext i1 @smulobri32(i32 %v1, i32 %v2) {
807; SDAG-LABEL: smulobri32:
808; SDAG:       # %bb.0:
809; SDAG-NEXT:    imull %esi, %edi
810; SDAG-NEXT:    jo .LBB17_1
811; SDAG-NEXT:  # %bb.2: # %continue
812; SDAG-NEXT:    movb $1, %al
813; SDAG-NEXT:    retq
814; SDAG-NEXT:  .LBB17_1: # %overflow
815; SDAG-NEXT:    xorl %eax, %eax
816; SDAG-NEXT:    retq
817;
818; FAST-LABEL: smulobri32:
819; FAST:       # %bb.0:
820; FAST-NEXT:    imull %esi, %edi
821; FAST-NEXT:    jo .LBB17_1
822; FAST-NEXT:  # %bb.2: # %continue
823; FAST-NEXT:    movb $1, %al
824; FAST-NEXT:    andb $1, %al
825; FAST-NEXT:    movzbl %al, %eax
826; FAST-NEXT:    retq
827; FAST-NEXT:  .LBB17_1: # %overflow
828; FAST-NEXT:    xorl %eax, %eax
829; FAST-NEXT:    andb $1, %al
830; FAST-NEXT:    movzbl %al, %eax
831; FAST-NEXT:    retq
832;
833; WIN64-LABEL: smulobri32:
834; WIN64:       # %bb.0:
835; WIN64-NEXT:    imull %edx, %ecx
836; WIN64-NEXT:    jo .LBB17_1
837; WIN64-NEXT:  # %bb.2: # %continue
838; WIN64-NEXT:    movb $1, %al
839; WIN64-NEXT:    retq
840; WIN64-NEXT:  .LBB17_1: # %overflow
841; WIN64-NEXT:    xorl %eax, %eax
842; WIN64-NEXT:    retq
843;
844; WIN32-LABEL: smulobri32:
845; WIN32:       # %bb.0:
846; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
847; WIN32-NEXT:    imull {{[0-9]+}}(%esp), %eax
848; WIN32-NEXT:    jo LBB17_1
849; WIN32-NEXT:  # %bb.2: # %continue
850; WIN32-NEXT:    movb $1, %al
851; WIN32-NEXT:    retl
852; WIN32-NEXT:  LBB17_1: # %overflow
853; WIN32-NEXT:    xorl %eax, %eax
854; WIN32-NEXT:    retl
855  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
856  %val = extractvalue {i32, i1} %t, 0
857  %obit = extractvalue {i32, i1} %t, 1
858  br i1 %obit, label %overflow, label %continue, !prof !0
859
860overflow:
861  ret i1 false
862
863continue:
864  ret i1 true
865}
866
867define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
868; SDAG-LABEL: smulobri64:
869; SDAG:       # %bb.0:
870; SDAG-NEXT:    imulq %rsi, %rdi
871; SDAG-NEXT:    jo .LBB18_1
872; SDAG-NEXT:  # %bb.2: # %continue
873; SDAG-NEXT:    movb $1, %al
874; SDAG-NEXT:    retq
875; SDAG-NEXT:  .LBB18_1: # %overflow
876; SDAG-NEXT:    xorl %eax, %eax
877; SDAG-NEXT:    retq
878;
879; FAST-LABEL: smulobri64:
880; FAST:       # %bb.0:
881; FAST-NEXT:    imulq %rsi, %rdi
882; FAST-NEXT:    jo .LBB18_1
883; FAST-NEXT:  # %bb.2: # %continue
884; FAST-NEXT:    movb $1, %al
885; FAST-NEXT:    andb $1, %al
886; FAST-NEXT:    movzbl %al, %eax
887; FAST-NEXT:    retq
888; FAST-NEXT:  .LBB18_1: # %overflow
889; FAST-NEXT:    xorl %eax, %eax
890; FAST-NEXT:    andb $1, %al
891; FAST-NEXT:    movzbl %al, %eax
892; FAST-NEXT:    retq
893;
894; WIN64-LABEL: smulobri64:
895; WIN64:       # %bb.0:
896; WIN64-NEXT:    imulq %rdx, %rcx
897; WIN64-NEXT:    jo .LBB18_1
898; WIN64-NEXT:  # %bb.2: # %continue
899; WIN64-NEXT:    movb $1, %al
900; WIN64-NEXT:    retq
901; WIN64-NEXT:  .LBB18_1: # %overflow
902; WIN64-NEXT:    xorl %eax, %eax
903; WIN64-NEXT:    retq
904;
905; WIN32-LABEL: smulobri64:
906; WIN32:       # %bb.0:
907; WIN32-NEXT:    pushl %edi
908; WIN32-NEXT:    pushl %esi
909; WIN32-NEXT:    pushl %eax
910; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
911; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
912; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
913; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
914; WIN32-NEXT:    movl $0, (%esp)
915; WIN32-NEXT:    movl %esp, %edi
916; WIN32-NEXT:    pushl %edi
917; WIN32-NEXT:    pushl %esi
918; WIN32-NEXT:    pushl %edx
919; WIN32-NEXT:    pushl %ecx
920; WIN32-NEXT:    pushl %eax
921; WIN32-NEXT:    calll ___mulodi4
922; WIN32-NEXT:    addl $20, %esp
923; WIN32-NEXT:    cmpl $0, (%esp)
924; WIN32-NEXT:    jne LBB18_1
925; WIN32-NEXT:  # %bb.3: # %continue
926; WIN32-NEXT:    movb $1, %al
927; WIN32-NEXT:  LBB18_2: # %overflow
928; WIN32-NEXT:    addl $4, %esp
929; WIN32-NEXT:    popl %esi
930; WIN32-NEXT:    popl %edi
931; WIN32-NEXT:    retl
932; WIN32-NEXT:  LBB18_1: # %overflow
933; WIN32-NEXT:    xorl %eax, %eax
934; WIN32-NEXT:    jmp LBB18_2
935  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
936  %val = extractvalue {i64, i1} %t, 0
937  %obit = extractvalue {i64, i1} %t, 1
938  br i1 %obit, label %overflow, label %continue, !prof !0
939
940overflow:
941  ret i1 false
942
943continue:
944  ret i1 true
945}
946
947define zeroext i1 @umulobri8(i8 %v1, i8 %v2) {
948; SDAG-LABEL: umulobri8:
949; SDAG:       # %bb.0:
950; SDAG-NEXT:    movl %edi, %eax
951; SDAG-NEXT:    # kill: def $al killed $al killed $eax
952; SDAG-NEXT:    mulb %sil
953; SDAG-NEXT:    jo .LBB19_1
954; SDAG-NEXT:  # %bb.2: # %continue
955; SDAG-NEXT:    movb $1, %al
956; SDAG-NEXT:    retq
957; SDAG-NEXT:  .LBB19_1: # %overflow
958; SDAG-NEXT:    xorl %eax, %eax
959; SDAG-NEXT:    retq
960;
961; FAST-LABEL: umulobri8:
962; FAST:       # %bb.0:
963; FAST-NEXT:    movl %edi, %eax
964; FAST-NEXT:    # kill: def $al killed $al killed $eax
965; FAST-NEXT:    mulb %sil
966; FAST-NEXT:    seto %al
967; FAST-NEXT:    testb $1, %al
968; FAST-NEXT:    jne .LBB19_1
969; FAST-NEXT:  # %bb.2: # %continue
970; FAST-NEXT:    movb $1, %al
971; FAST-NEXT:    andb $1, %al
972; FAST-NEXT:    movzbl %al, %eax
973; FAST-NEXT:    retq
974; FAST-NEXT:  .LBB19_1: # %overflow
975; FAST-NEXT:    xorl %eax, %eax
976; FAST-NEXT:    andb $1, %al
977; FAST-NEXT:    movzbl %al, %eax
978; FAST-NEXT:    retq
979;
980; WIN64-LABEL: umulobri8:
981; WIN64:       # %bb.0:
982; WIN64-NEXT:    movl %ecx, %eax
983; WIN64-NEXT:    mulb %dl
984; WIN64-NEXT:    jo .LBB19_1
985; WIN64-NEXT:  # %bb.2: # %continue
986; WIN64-NEXT:    movb $1, %al
987; WIN64-NEXT:    retq
988; WIN64-NEXT:  .LBB19_1: # %overflow
989; WIN64-NEXT:    xorl %eax, %eax
990; WIN64-NEXT:    retq
991;
992; WIN32-LABEL: umulobri8:
993; WIN32:       # %bb.0:
994; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
995; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
996; WIN32-NEXT:    jo LBB19_1
997; WIN32-NEXT:  # %bb.2: # %continue
998; WIN32-NEXT:    movb $1, %al
999; WIN32-NEXT:    retl
1000; WIN32-NEXT:  LBB19_1: # %overflow
1001; WIN32-NEXT:    xorl %eax, %eax
1002; WIN32-NEXT:    retl
1003  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1004  %val = extractvalue {i8, i1} %t, 0
1005  %obit = extractvalue {i8, i1} %t, 1
1006  br i1 %obit, label %overflow, label %continue, !prof !0
1007
1008overflow:
1009  ret i1 false
1010
1011continue:
1012  ret i1 true
1013}
1014
1015define zeroext i1 @umulobri16(i16 %v1, i16 %v2) {
1016; SDAG-LABEL: umulobri16:
1017; SDAG:       # %bb.0:
1018; SDAG-NEXT:    movl %edi, %eax
1019; SDAG-NEXT:    # kill: def $ax killed $ax killed $eax
1020; SDAG-NEXT:    mulw %si
1021; SDAG-NEXT:    jo .LBB20_1
1022; SDAG-NEXT:  # %bb.2: # %continue
1023; SDAG-NEXT:    movb $1, %al
1024; SDAG-NEXT:    retq
1025; SDAG-NEXT:  .LBB20_1: # %overflow
1026; SDAG-NEXT:    xorl %eax, %eax
1027; SDAG-NEXT:    retq
1028;
1029; FAST-LABEL: umulobri16:
1030; FAST:       # %bb.0:
1031; FAST-NEXT:    movl %edi, %eax
1032; FAST-NEXT:    # kill: def $ax killed $ax killed $eax
1033; FAST-NEXT:    mulw %si
1034; FAST-NEXT:    seto %al
1035; FAST-NEXT:    testb $1, %al
1036; FAST-NEXT:    jne .LBB20_1
1037; FAST-NEXT:  # %bb.2: # %continue
1038; FAST-NEXT:    movb $1, %al
1039; FAST-NEXT:    andb $1, %al
1040; FAST-NEXT:    movzbl %al, %eax
1041; FAST-NEXT:    retq
1042; FAST-NEXT:  .LBB20_1: # %overflow
1043; FAST-NEXT:    xorl %eax, %eax
1044; FAST-NEXT:    andb $1, %al
1045; FAST-NEXT:    movzbl %al, %eax
1046; FAST-NEXT:    retq
1047;
1048; WIN64-LABEL: umulobri16:
1049; WIN64:       # %bb.0:
1050; WIN64-NEXT:    movl %ecx, %eax
1051; WIN64-NEXT:    mulw %dx
1052; WIN64-NEXT:    jo .LBB20_1
1053; WIN64-NEXT:  # %bb.2: # %continue
1054; WIN64-NEXT:    movb $1, %al
1055; WIN64-NEXT:    retq
1056; WIN64-NEXT:  .LBB20_1: # %overflow
1057; WIN64-NEXT:    xorl %eax, %eax
1058; WIN64-NEXT:    retq
1059;
1060; WIN32-LABEL: umulobri16:
1061; WIN32:       # %bb.0:
1062; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
1063; WIN32-NEXT:    mulw {{[0-9]+}}(%esp)
1064; WIN32-NEXT:    jo LBB20_1
1065; WIN32-NEXT:  # %bb.2: # %continue
1066; WIN32-NEXT:    movb $1, %al
1067; WIN32-NEXT:    retl
1068; WIN32-NEXT:  LBB20_1: # %overflow
1069; WIN32-NEXT:    xorl %eax, %eax
1070; WIN32-NEXT:    retl
1071  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1072  %val = extractvalue {i16, i1} %t, 0
1073  %obit = extractvalue {i16, i1} %t, 1
1074  br i1 %obit, label %overflow, label %continue, !prof !0
1075
1076overflow:
1077  ret i1 false
1078
1079continue:
1080  ret i1 true
1081}
1082
1083define zeroext i1 @umulobri32(i32 %v1, i32 %v2) {
1084; SDAG-LABEL: umulobri32:
1085; SDAG:       # %bb.0:
1086; SDAG-NEXT:    movl %edi, %eax
1087; SDAG-NEXT:    mull %esi
1088; SDAG-NEXT:    jo .LBB21_1
1089; SDAG-NEXT:  # %bb.2: # %continue
1090; SDAG-NEXT:    movb $1, %al
1091; SDAG-NEXT:    retq
1092; SDAG-NEXT:  .LBB21_1: # %overflow
1093; SDAG-NEXT:    xorl %eax, %eax
1094; SDAG-NEXT:    retq
1095;
1096; FAST-LABEL: umulobri32:
1097; FAST:       # %bb.0:
1098; FAST-NEXT:    movl %edi, %eax
1099; FAST-NEXT:    mull %esi
1100; FAST-NEXT:    jo .LBB21_1
1101; FAST-NEXT:  # %bb.2: # %continue
1102; FAST-NEXT:    movb $1, %al
1103; FAST-NEXT:    andb $1, %al
1104; FAST-NEXT:    movzbl %al, %eax
1105; FAST-NEXT:    retq
1106; FAST-NEXT:  .LBB21_1: # %overflow
1107; FAST-NEXT:    xorl %eax, %eax
1108; FAST-NEXT:    andb $1, %al
1109; FAST-NEXT:    movzbl %al, %eax
1110; FAST-NEXT:    retq
1111;
1112; WIN64-LABEL: umulobri32:
1113; WIN64:       # %bb.0:
1114; WIN64-NEXT:    movl %ecx, %eax
1115; WIN64-NEXT:    mull %edx
1116; WIN64-NEXT:    jo .LBB21_1
1117; WIN64-NEXT:  # %bb.2: # %continue
1118; WIN64-NEXT:    movb $1, %al
1119; WIN64-NEXT:    retq
1120; WIN64-NEXT:  .LBB21_1: # %overflow
1121; WIN64-NEXT:    xorl %eax, %eax
1122; WIN64-NEXT:    retq
1123;
1124; WIN32-LABEL: umulobri32:
1125; WIN32:       # %bb.0:
1126; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1127; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1128; WIN32-NEXT:    jo LBB21_1
1129; WIN32-NEXT:  # %bb.2: # %continue
1130; WIN32-NEXT:    movb $1, %al
1131; WIN32-NEXT:    retl
1132; WIN32-NEXT:  LBB21_1: # %overflow
1133; WIN32-NEXT:    xorl %eax, %eax
1134; WIN32-NEXT:    retl
1135  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1136  %val = extractvalue {i32, i1} %t, 0
1137  %obit = extractvalue {i32, i1} %t, 1
1138  br i1 %obit, label %overflow, label %continue, !prof !0
1139
1140overflow:
1141  ret i1 false
1142
1143continue:
1144  ret i1 true
1145}
1146
1147define zeroext i1 @umulobri64(i64 %v1, i64 %v2) {
1148; SDAG-LABEL: umulobri64:
1149; SDAG:       # %bb.0:
1150; SDAG-NEXT:    movq %rdi, %rax
1151; SDAG-NEXT:    mulq %rsi
1152; SDAG-NEXT:    jo .LBB22_1
1153; SDAG-NEXT:  # %bb.2: # %continue
1154; SDAG-NEXT:    movb $1, %al
1155; SDAG-NEXT:    retq
1156; SDAG-NEXT:  .LBB22_1: # %overflow
1157; SDAG-NEXT:    xorl %eax, %eax
1158; SDAG-NEXT:    retq
1159;
1160; FAST-LABEL: umulobri64:
1161; FAST:       # %bb.0:
1162; FAST-NEXT:    movq %rdi, %rax
1163; FAST-NEXT:    mulq %rsi
1164; FAST-NEXT:    jo .LBB22_1
1165; FAST-NEXT:  # %bb.2: # %continue
1166; FAST-NEXT:    movb $1, %al
1167; FAST-NEXT:    andb $1, %al
1168; FAST-NEXT:    movzbl %al, %eax
1169; FAST-NEXT:    retq
1170; FAST-NEXT:  .LBB22_1: # %overflow
1171; FAST-NEXT:    xorl %eax, %eax
1172; FAST-NEXT:    andb $1, %al
1173; FAST-NEXT:    movzbl %al, %eax
1174; FAST-NEXT:    retq
1175;
1176; WIN64-LABEL: umulobri64:
1177; WIN64:       # %bb.0:
1178; WIN64-NEXT:    movq %rcx, %rax
1179; WIN64-NEXT:    mulq %rdx
1180; WIN64-NEXT:    jo .LBB22_1
1181; WIN64-NEXT:  # %bb.2: # %continue
1182; WIN64-NEXT:    movb $1, %al
1183; WIN64-NEXT:    retq
1184; WIN64-NEXT:  .LBB22_1: # %overflow
1185; WIN64-NEXT:    xorl %eax, %eax
1186; WIN64-NEXT:    retq
1187;
1188; WIN32-LABEL: umulobri64:
1189; WIN32:       # %bb.0:
1190; WIN32-NEXT:    pushl %ebp
1191; WIN32-NEXT:    pushl %ebx
1192; WIN32-NEXT:    pushl %edi
1193; WIN32-NEXT:    pushl %esi
1194; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
1195; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1196; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1197; WIN32-NEXT:    testl %esi, %esi
1198; WIN32-NEXT:    setne %dl
1199; WIN32-NEXT:    testl %eax, %eax
1200; WIN32-NEXT:    setne %bl
1201; WIN32-NEXT:    andb %dl, %bl
1202; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1203; WIN32-NEXT:    movl %eax, %edi
1204; WIN32-NEXT:    seto %bh
1205; WIN32-NEXT:    movl %esi, %eax
1206; WIN32-NEXT:    mull %ebp
1207; WIN32-NEXT:    movl %eax, %esi
1208; WIN32-NEXT:    seto %cl
1209; WIN32-NEXT:    orb %bh, %cl
1210; WIN32-NEXT:    addl %edi, %esi
1211; WIN32-NEXT:    movl %ebp, %eax
1212; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1213; WIN32-NEXT:    addl %esi, %edx
1214; WIN32-NEXT:    setb %al
1215; WIN32-NEXT:    orb %cl, %al
1216; WIN32-NEXT:    orb %bl, %al
1217; WIN32-NEXT:    subb $1, %al
1218; WIN32-NEXT:    je LBB22_1
1219; WIN32-NEXT:  # %bb.3: # %continue
1220; WIN32-NEXT:    movb $1, %al
1221; WIN32-NEXT:  LBB22_2: # %overflow
1222; WIN32-NEXT:    popl %esi
1223; WIN32-NEXT:    popl %edi
1224; WIN32-NEXT:    popl %ebx
1225; WIN32-NEXT:    popl %ebp
1226; WIN32-NEXT:    retl
1227; WIN32-NEXT:  LBB22_1: # %overflow
1228; WIN32-NEXT:    xorl %eax, %eax
1229; WIN32-NEXT:    jmp LBB22_2
1230  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
1231  %val = extractvalue {i64, i1} %t, 0
1232  %obit = extractvalue {i64, i1} %t, 1
1233  br i1 %obit, label %overflow, label %continue, !prof !0
1234
1235overflow:
1236  ret i1 false
1237
1238continue:
1239  ret i1 true
1240}
1241
1242define i1 @bug27873(i64 %c1, i1 %c2) {
1243; LINUX-LABEL: bug27873:
1244; LINUX:       # %bb.0:
1245; LINUX-NEXT:    movq %rdi, %rax
1246; LINUX-NEXT:    movl $160, %ecx
1247; LINUX-NEXT:    mulq %rcx
1248; LINUX-NEXT:    seto %al
1249; LINUX-NEXT:    orb %sil, %al
1250; LINUX-NEXT:    retq
1251;
1252; WIN64-LABEL: bug27873:
1253; WIN64:       # %bb.0:
1254; WIN64-NEXT:    movl %edx, %r8d
1255; WIN64-NEXT:    movq %rcx, %rax
1256; WIN64-NEXT:    movl $160, %ecx
1257; WIN64-NEXT:    mulq %rcx
1258; WIN64-NEXT:    seto %al
1259; WIN64-NEXT:    orb %r8b, %al
1260; WIN64-NEXT:    retq
1261;
1262; WIN32-LABEL: bug27873:
1263; WIN32:       # %bb.0:
1264; WIN32-NEXT:    pushl %ebx
1265; WIN32-NEXT:    movl $160, %eax
1266; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1267; WIN32-NEXT:    movl %eax, %ecx
1268; WIN32-NEXT:    seto %bl
1269; WIN32-NEXT:    movl $160, %eax
1270; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1271; WIN32-NEXT:    addl %ecx, %edx
1272; WIN32-NEXT:    setb %al
1273; WIN32-NEXT:    orb %bl, %al
1274; WIN32-NEXT:    orb {{[0-9]+}}(%esp), %al
1275; WIN32-NEXT:    popl %ebx
1276; WIN32-NEXT:    retl
1277  %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160)
1278  %mul.overflow = extractvalue { i64, i1 } %mul, 1
1279  %x1 = or i1 %c2, %mul.overflow
1280  ret i1 %x1
1281}
1282
1283define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
1284; SDAG-LABEL: smuloi8_load:
1285; SDAG:       # %bb.0:
1286; SDAG-NEXT:    movl %esi, %eax
1287; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1288; SDAG-NEXT:    imulb (%rdi)
1289; SDAG-NEXT:    seto %cl
1290; SDAG-NEXT:    movb %al, (%rdx)
1291; SDAG-NEXT:    movl %ecx, %eax
1292; SDAG-NEXT:    retq
1293;
1294; FAST-LABEL: smuloi8_load:
1295; FAST:       # %bb.0:
1296; FAST-NEXT:    movb (%rdi), %al
1297; FAST-NEXT:    imulb %sil
1298; FAST-NEXT:    seto %cl
1299; FAST-NEXT:    movb %al, (%rdx)
1300; FAST-NEXT:    andb $1, %cl
1301; FAST-NEXT:    movzbl %cl, %eax
1302; FAST-NEXT:    retq
1303;
1304; WIN64-LABEL: smuloi8_load:
1305; WIN64:       # %bb.0:
1306; WIN64-NEXT:    movl %edx, %eax
1307; WIN64-NEXT:    imulb (%rcx)
1308; WIN64-NEXT:    seto %cl
1309; WIN64-NEXT:    movb %al, (%r8)
1310; WIN64-NEXT:    movl %ecx, %eax
1311; WIN64-NEXT:    retq
1312;
1313; WIN32-LABEL: smuloi8_load:
1314; WIN32:       # %bb.0:
1315; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1316; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1317; WIN32-NEXT:    movb (%eax), %al
1318; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
1319; WIN32-NEXT:    seto %cl
1320; WIN32-NEXT:    movb %al, (%edx)
1321; WIN32-NEXT:    movl %ecx, %eax
1322; WIN32-NEXT:    retl
1323  %v1 = load i8, i8* %ptr1
1324  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
1325  %val = extractvalue {i8, i1} %t, 0
1326  %obit = extractvalue {i8, i1} %t, 1
1327  store i8 %val, i8* %res
1328  ret i1 %obit
1329}
1330
1331define zeroext i1 @smuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
1332; SDAG-LABEL: smuloi8_load2:
1333; SDAG:       # %bb.0:
1334; SDAG-NEXT:    movl %edi, %eax
1335; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1336; SDAG-NEXT:    imulb (%rsi)
1337; SDAG-NEXT:    seto %cl
1338; SDAG-NEXT:    movb %al, (%rdx)
1339; SDAG-NEXT:    movl %ecx, %eax
1340; SDAG-NEXT:    retq
1341;
1342; FAST-LABEL: smuloi8_load2:
1343; FAST:       # %bb.0:
1344; FAST-NEXT:    movl %edi, %eax
1345; FAST-NEXT:    # kill: def $al killed $al killed $eax
1346; FAST-NEXT:    imulb (%rsi)
1347; FAST-NEXT:    seto %cl
1348; FAST-NEXT:    movb %al, (%rdx)
1349; FAST-NEXT:    andb $1, %cl
1350; FAST-NEXT:    movzbl %cl, %eax
1351; FAST-NEXT:    retq
1352;
1353; WIN64-LABEL: smuloi8_load2:
1354; WIN64:       # %bb.0:
1355; WIN64-NEXT:    movl %ecx, %eax
1356; WIN64-NEXT:    imulb (%rdx)
1357; WIN64-NEXT:    seto %cl
1358; WIN64-NEXT:    movb %al, (%r8)
1359; WIN64-NEXT:    movl %ecx, %eax
1360; WIN64-NEXT:    retq
1361;
1362; WIN32-LABEL: smuloi8_load2:
1363; WIN32:       # %bb.0:
1364; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1365; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
1366; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1367; WIN32-NEXT:    imulb (%ecx)
1368; WIN32-NEXT:    seto %cl
1369; WIN32-NEXT:    movb %al, (%edx)
1370; WIN32-NEXT:    movl %ecx, %eax
1371; WIN32-NEXT:    retl
1372  %v2 = load i8, i8* %ptr2
1373  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
1374  %val = extractvalue {i8, i1} %t, 0
1375  %obit = extractvalue {i8, i1} %t, 1
1376  store i8 %val, i8* %res
1377  ret i1 %obit
1378}
1379
1380define zeroext i1 @smuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
1381; SDAG-LABEL: smuloi16_load:
1382; SDAG:       # %bb.0:
1383; SDAG-NEXT:    imulw (%rdi), %si
1384; SDAG-NEXT:    seto %al
1385; SDAG-NEXT:    movw %si, (%rdx)
1386; SDAG-NEXT:    retq
1387;
1388; FAST-LABEL: smuloi16_load:
1389; FAST:       # %bb.0:
1390; FAST-NEXT:    imulw (%rdi), %si
1391; FAST-NEXT:    seto %al
1392; FAST-NEXT:    movw %si, (%rdx)
1393; FAST-NEXT:    andb $1, %al
1394; FAST-NEXT:    movzbl %al, %eax
1395; FAST-NEXT:    retq
1396;
1397; WIN64-LABEL: smuloi16_load:
1398; WIN64:       # %bb.0:
1399; WIN64-NEXT:    imulw (%rcx), %dx
1400; WIN64-NEXT:    seto %al
1401; WIN64-NEXT:    movw %dx, (%r8)
1402; WIN64-NEXT:    retq
1403;
1404; WIN32-LABEL: smuloi16_load:
1405; WIN32:       # %bb.0:
1406; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1407; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1408; WIN32-NEXT:    movzwl (%eax), %edx
1409; WIN32-NEXT:    imulw {{[0-9]+}}(%esp), %dx
1410; WIN32-NEXT:    seto %al
1411; WIN32-NEXT:    movw %dx, (%ecx)
1412; WIN32-NEXT:    retl
1413  %v1 = load i16, i16* %ptr1
1414  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
1415  %val = extractvalue {i16, i1} %t, 0
1416  %obit = extractvalue {i16, i1} %t, 1
1417  store i16 %val, i16* %res
1418  ret i1 %obit
1419}
1420
1421define zeroext i1 @smuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
1422; SDAG-LABEL: smuloi16_load2:
1423; SDAG:       # %bb.0:
1424; SDAG-NEXT:    imulw (%rsi), %di
1425; SDAG-NEXT:    seto %al
1426; SDAG-NEXT:    movw %di, (%rdx)
1427; SDAG-NEXT:    retq
1428;
1429; FAST-LABEL: smuloi16_load2:
1430; FAST:       # %bb.0:
1431; FAST-NEXT:    imulw (%rsi), %di
1432; FAST-NEXT:    seto %al
1433; FAST-NEXT:    movw %di, (%rdx)
1434; FAST-NEXT:    andb $1, %al
1435; FAST-NEXT:    movzbl %al, %eax
1436; FAST-NEXT:    retq
1437;
1438; WIN64-LABEL: smuloi16_load2:
1439; WIN64:       # %bb.0:
1440; WIN64-NEXT:    imulw (%rdx), %cx
1441; WIN64-NEXT:    seto %al
1442; WIN64-NEXT:    movw %cx, (%r8)
1443; WIN64-NEXT:    retq
1444;
1445; WIN32-LABEL: smuloi16_load2:
1446; WIN32:       # %bb.0:
1447; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1448; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1449; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
1450; WIN32-NEXT:    imulw (%eax), %dx
1451; WIN32-NEXT:    seto %al
1452; WIN32-NEXT:    movw %dx, (%ecx)
1453; WIN32-NEXT:    retl
1454  %v2 = load i16, i16* %ptr2
1455  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
1456  %val = extractvalue {i16, i1} %t, 0
1457  %obit = extractvalue {i16, i1} %t, 1
1458  store i16 %val, i16* %res
1459  ret i1 %obit
1460}
1461
1462define zeroext i1 @smuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
1463; SDAG-LABEL: smuloi32_load:
1464; SDAG:       # %bb.0:
1465; SDAG-NEXT:    imull (%rdi), %esi
1466; SDAG-NEXT:    seto %al
1467; SDAG-NEXT:    movl %esi, (%rdx)
1468; SDAG-NEXT:    retq
1469;
1470; FAST-LABEL: smuloi32_load:
1471; FAST:       # %bb.0:
1472; FAST-NEXT:    imull (%rdi), %esi
1473; FAST-NEXT:    seto %al
1474; FAST-NEXT:    movl %esi, (%rdx)
1475; FAST-NEXT:    andb $1, %al
1476; FAST-NEXT:    movzbl %al, %eax
1477; FAST-NEXT:    retq
1478;
1479; WIN64-LABEL: smuloi32_load:
1480; WIN64:       # %bb.0:
1481; WIN64-NEXT:    imull (%rcx), %edx
1482; WIN64-NEXT:    seto %al
1483; WIN64-NEXT:    movl %edx, (%r8)
1484; WIN64-NEXT:    retq
1485;
1486; WIN32-LABEL: smuloi32_load:
1487; WIN32:       # %bb.0:
1488; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1489; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1490; WIN32-NEXT:    movl (%eax), %edx
1491; WIN32-NEXT:    imull {{[0-9]+}}(%esp), %edx
1492; WIN32-NEXT:    seto %al
1493; WIN32-NEXT:    movl %edx, (%ecx)
1494; WIN32-NEXT:    retl
1495  %v1 = load i32, i32* %ptr1
1496  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
1497  %val = extractvalue {i32, i1} %t, 0
1498  %obit = extractvalue {i32, i1} %t, 1
1499  store i32 %val, i32* %res
1500  ret i1 %obit
1501}
1502
1503define zeroext i1 @smuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
1504; SDAG-LABEL: smuloi32_load2:
1505; SDAG:       # %bb.0:
1506; SDAG-NEXT:    imull (%rsi), %edi
1507; SDAG-NEXT:    seto %al
1508; SDAG-NEXT:    movl %edi, (%rdx)
1509; SDAG-NEXT:    retq
1510;
1511; FAST-LABEL: smuloi32_load2:
1512; FAST:       # %bb.0:
1513; FAST-NEXT:    imull (%rsi), %edi
1514; FAST-NEXT:    seto %al
1515; FAST-NEXT:    movl %edi, (%rdx)
1516; FAST-NEXT:    andb $1, %al
1517; FAST-NEXT:    movzbl %al, %eax
1518; FAST-NEXT:    retq
1519;
1520; WIN64-LABEL: smuloi32_load2:
1521; WIN64:       # %bb.0:
1522; WIN64-NEXT:    imull (%rdx), %ecx
1523; WIN64-NEXT:    seto %al
1524; WIN64-NEXT:    movl %ecx, (%r8)
1525; WIN64-NEXT:    retq
1526;
1527; WIN32-LABEL: smuloi32_load2:
1528; WIN32:       # %bb.0:
1529; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1530; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1531; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1532; WIN32-NEXT:    imull (%eax), %edx
1533; WIN32-NEXT:    seto %al
1534; WIN32-NEXT:    movl %edx, (%ecx)
1535; WIN32-NEXT:    retl
1536  %v2 = load i32, i32* %ptr2
1537  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
1538  %val = extractvalue {i32, i1} %t, 0
1539  %obit = extractvalue {i32, i1} %t, 1
1540  store i32 %val, i32* %res
1541  ret i1 %obit
1542}
1543
1544define zeroext i1 @smuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
1545; SDAG-LABEL: smuloi64_load:
1546; SDAG:       # %bb.0:
1547; SDAG-NEXT:    imulq (%rdi), %rsi
1548; SDAG-NEXT:    seto %al
1549; SDAG-NEXT:    movq %rsi, (%rdx)
1550; SDAG-NEXT:    retq
1551;
1552; FAST-LABEL: smuloi64_load:
1553; FAST:       # %bb.0:
1554; FAST-NEXT:    imulq (%rdi), %rsi
1555; FAST-NEXT:    seto %al
1556; FAST-NEXT:    movq %rsi, (%rdx)
1557; FAST-NEXT:    andb $1, %al
1558; FAST-NEXT:    movzbl %al, %eax
1559; FAST-NEXT:    retq
1560;
1561; WIN64-LABEL: smuloi64_load:
1562; WIN64:       # %bb.0:
1563; WIN64-NEXT:    imulq (%rcx), %rdx
1564; WIN64-NEXT:    seto %al
1565; WIN64-NEXT:    movq %rdx, (%r8)
1566; WIN64-NEXT:    retq
1567;
1568; WIN32-LABEL: smuloi64_load:
1569; WIN32:       # %bb.0:
1570; WIN32-NEXT:    pushl %ebx
1571; WIN32-NEXT:    pushl %edi
1572; WIN32-NEXT:    pushl %esi
1573; WIN32-NEXT:    pushl %eax
1574; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1575; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1576; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1577; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1578; WIN32-NEXT:    movl (%edx), %edi
1579; WIN32-NEXT:    movl 4(%edx), %edx
1580; WIN32-NEXT:    movl $0, (%esp)
1581; WIN32-NEXT:    movl %esp, %ebx
1582; WIN32-NEXT:    pushl %ebx
1583; WIN32-NEXT:    pushl %ecx
1584; WIN32-NEXT:    pushl %eax
1585; WIN32-NEXT:    pushl %edx
1586; WIN32-NEXT:    pushl %edi
1587; WIN32-NEXT:    calll ___mulodi4
1588; WIN32-NEXT:    addl $20, %esp
1589; WIN32-NEXT:    cmpl $0, (%esp)
1590; WIN32-NEXT:    setne %cl
1591; WIN32-NEXT:    movl %eax, (%esi)
1592; WIN32-NEXT:    movl %edx, 4(%esi)
1593; WIN32-NEXT:    movl %ecx, %eax
1594; WIN32-NEXT:    addl $4, %esp
1595; WIN32-NEXT:    popl %esi
1596; WIN32-NEXT:    popl %edi
1597; WIN32-NEXT:    popl %ebx
1598; WIN32-NEXT:    retl
1599  %v1 = load i64, i64* %ptr1
1600  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
1601  %val = extractvalue {i64, i1} %t, 0
1602  %obit = extractvalue {i64, i1} %t, 1
1603  store i64 %val, i64* %res
1604  ret i1 %obit
1605}
1606
1607define zeroext i1 @smuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
1608; SDAG-LABEL: smuloi64_load2:
1609; SDAG:       # %bb.0:
1610; SDAG-NEXT:    imulq (%rsi), %rdi
1611; SDAG-NEXT:    seto %al
1612; SDAG-NEXT:    movq %rdi, (%rdx)
1613; SDAG-NEXT:    retq
1614;
1615; FAST-LABEL: smuloi64_load2:
1616; FAST:       # %bb.0:
1617; FAST-NEXT:    imulq (%rsi), %rdi
1618; FAST-NEXT:    seto %al
1619; FAST-NEXT:    movq %rdi, (%rdx)
1620; FAST-NEXT:    andb $1, %al
1621; FAST-NEXT:    movzbl %al, %eax
1622; FAST-NEXT:    retq
1623;
1624; WIN64-LABEL: smuloi64_load2:
1625; WIN64:       # %bb.0:
1626; WIN64-NEXT:    imulq (%rdx), %rcx
1627; WIN64-NEXT:    seto %al
1628; WIN64-NEXT:    movq %rcx, (%r8)
1629; WIN64-NEXT:    retq
1630;
1631; WIN32-LABEL: smuloi64_load2:
1632; WIN32:       # %bb.0:
1633; WIN32-NEXT:    pushl %ebx
1634; WIN32-NEXT:    pushl %edi
1635; WIN32-NEXT:    pushl %esi
1636; WIN32-NEXT:    pushl %eax
1637; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1638; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1639; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1640; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1641; WIN32-NEXT:    movl (%edx), %edi
1642; WIN32-NEXT:    movl 4(%edx), %edx
1643; WIN32-NEXT:    movl $0, (%esp)
1644; WIN32-NEXT:    movl %esp, %ebx
1645; WIN32-NEXT:    pushl %ebx
1646; WIN32-NEXT:    pushl %edx
1647; WIN32-NEXT:    pushl %edi
1648; WIN32-NEXT:    pushl %ecx
1649; WIN32-NEXT:    pushl %eax
1650; WIN32-NEXT:    calll ___mulodi4
1651; WIN32-NEXT:    addl $20, %esp
1652; WIN32-NEXT:    cmpl $0, (%esp)
1653; WIN32-NEXT:    setne %cl
1654; WIN32-NEXT:    movl %eax, (%esi)
1655; WIN32-NEXT:    movl %edx, 4(%esi)
1656; WIN32-NEXT:    movl %ecx, %eax
1657; WIN32-NEXT:    addl $4, %esp
1658; WIN32-NEXT:    popl %esi
1659; WIN32-NEXT:    popl %edi
1660; WIN32-NEXT:    popl %ebx
1661; WIN32-NEXT:    retl
1662  %v2 = load i64, i64* %ptr2
1663  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
1664  %val = extractvalue {i64, i1} %t, 0
1665  %obit = extractvalue {i64, i1} %t, 1
1666  store i64 %val, i64* %res
1667  ret i1 %obit
1668}
1669
1670define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
1671; SDAG-LABEL: umuloi8_load:
1672; SDAG:       # %bb.0:
1673; SDAG-NEXT:    movl %esi, %eax
1674; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1675; SDAG-NEXT:    mulb (%rdi)
1676; SDAG-NEXT:    seto %cl
1677; SDAG-NEXT:    movb %al, (%rdx)
1678; SDAG-NEXT:    movl %ecx, %eax
1679; SDAG-NEXT:    retq
1680;
1681; FAST-LABEL: umuloi8_load:
1682; FAST:       # %bb.0:
1683; FAST-NEXT:    movb (%rdi), %al
1684; FAST-NEXT:    mulb %sil
1685; FAST-NEXT:    seto %cl
1686; FAST-NEXT:    movb %al, (%rdx)
1687; FAST-NEXT:    andb $1, %cl
1688; FAST-NEXT:    movzbl %cl, %eax
1689; FAST-NEXT:    retq
1690;
1691; WIN64-LABEL: umuloi8_load:
1692; WIN64:       # %bb.0:
1693; WIN64-NEXT:    movl %edx, %eax
1694; WIN64-NEXT:    mulb (%rcx)
1695; WIN64-NEXT:    seto %cl
1696; WIN64-NEXT:    movb %al, (%r8)
1697; WIN64-NEXT:    movl %ecx, %eax
1698; WIN64-NEXT:    retq
1699;
1700; WIN32-LABEL: umuloi8_load:
1701; WIN32:       # %bb.0:
1702; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1703; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1704; WIN32-NEXT:    movb (%eax), %al
1705; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
1706; WIN32-NEXT:    seto %cl
1707; WIN32-NEXT:    movb %al, (%edx)
1708; WIN32-NEXT:    movl %ecx, %eax
1709; WIN32-NEXT:    retl
1710  %v1 = load i8, i8* %ptr1
1711  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1712  %val = extractvalue {i8, i1} %t, 0
1713  %obit = extractvalue {i8, i1} %t, 1
1714  store i8 %val, i8* %res
1715  ret i1 %obit
1716}
1717
1718define zeroext i1 @umuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
1719; SDAG-LABEL: umuloi8_load2:
1720; SDAG:       # %bb.0:
1721; SDAG-NEXT:    movl %edi, %eax
1722; SDAG-NEXT:    # kill: def $al killed $al killed $eax
1723; SDAG-NEXT:    mulb (%rsi)
1724; SDAG-NEXT:    seto %cl
1725; SDAG-NEXT:    movb %al, (%rdx)
1726; SDAG-NEXT:    movl %ecx, %eax
1727; SDAG-NEXT:    retq
1728;
1729; FAST-LABEL: umuloi8_load2:
1730; FAST:       # %bb.0:
1731; FAST-NEXT:    movl %edi, %eax
1732; FAST-NEXT:    # kill: def $al killed $al killed $eax
1733; FAST-NEXT:    mulb (%rsi)
1734; FAST-NEXT:    seto %cl
1735; FAST-NEXT:    movb %al, (%rdx)
1736; FAST-NEXT:    andb $1, %cl
1737; FAST-NEXT:    movzbl %cl, %eax
1738; FAST-NEXT:    retq
1739;
1740; WIN64-LABEL: umuloi8_load2:
1741; WIN64:       # %bb.0:
1742; WIN64-NEXT:    movl %ecx, %eax
1743; WIN64-NEXT:    mulb (%rdx)
1744; WIN64-NEXT:    seto %cl
1745; WIN64-NEXT:    movb %al, (%r8)
1746; WIN64-NEXT:    movl %ecx, %eax
1747; WIN64-NEXT:    retq
1748;
1749; WIN32-LABEL: umuloi8_load2:
1750; WIN32:       # %bb.0:
1751; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
1752; WIN32-NEXT:    movb {{[0-9]+}}(%esp), %al
1753; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1754; WIN32-NEXT:    mulb (%ecx)
1755; WIN32-NEXT:    seto %cl
1756; WIN32-NEXT:    movb %al, (%edx)
1757; WIN32-NEXT:    movl %ecx, %eax
1758; WIN32-NEXT:    retl
1759  %v2 = load i8, i8* %ptr2
1760  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
1761  %val = extractvalue {i8, i1} %t, 0
1762  %obit = extractvalue {i8, i1} %t, 1
1763  store i8 %val, i8* %res
1764  ret i1 %obit
1765}
1766
1767define zeroext i1 @umuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
1768; SDAG-LABEL: umuloi16_load:
1769; SDAG:       # %bb.0:
1770; SDAG-NEXT:    movq %rdx, %rcx
1771; SDAG-NEXT:    movl %esi, %eax
1772; SDAG-NEXT:    # kill: def $ax killed $ax killed $eax
1773; SDAG-NEXT:    mulw (%rdi)
1774; SDAG-NEXT:    seto %dl
1775; SDAG-NEXT:    movw %ax, (%rcx)
1776; SDAG-NEXT:    movl %edx, %eax
1777; SDAG-NEXT:    retq
1778;
1779; FAST-LABEL: umuloi16_load:
1780; FAST:       # %bb.0:
1781; FAST-NEXT:    movq %rdx, %rcx
1782; FAST-NEXT:    movzwl (%rdi), %eax
1783; FAST-NEXT:    mulw %si
1784; FAST-NEXT:    seto %dl
1785; FAST-NEXT:    movw %ax, (%rcx)
1786; FAST-NEXT:    andb $1, %dl
1787; FAST-NEXT:    movzbl %dl, %eax
1788; FAST-NEXT:    retq
1789;
1790; WIN64-LABEL: umuloi16_load:
1791; WIN64:       # %bb.0:
1792; WIN64-NEXT:    movl %edx, %eax
1793; WIN64-NEXT:    mulw (%rcx)
1794; WIN64-NEXT:    seto %cl
1795; WIN64-NEXT:    movw %ax, (%r8)
1796; WIN64-NEXT:    movl %ecx, %eax
1797; WIN64-NEXT:    retq
1798;
1799; WIN32-LABEL: umuloi16_load:
1800; WIN32:       # %bb.0:
1801; WIN32-NEXT:    pushl %esi
1802; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1803; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1804; WIN32-NEXT:    movzwl (%eax), %eax
1805; WIN32-NEXT:    mulw {{[0-9]+}}(%esp)
1806; WIN32-NEXT:    seto %cl
1807; WIN32-NEXT:    movw %ax, (%esi)
1808; WIN32-NEXT:    movl %ecx, %eax
1809; WIN32-NEXT:    popl %esi
1810; WIN32-NEXT:    retl
1811  %v1 = load i16, i16* %ptr1
1812  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1813  %val = extractvalue {i16, i1} %t, 0
1814  %obit = extractvalue {i16, i1} %t, 1
1815  store i16 %val, i16* %res
1816  ret i1 %obit
1817}
1818
1819define zeroext i1 @umuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
1820; SDAG-LABEL: umuloi16_load2:
1821; SDAG:       # %bb.0:
1822; SDAG-NEXT:    movq %rdx, %rcx
1823; SDAG-NEXT:    movl %edi, %eax
1824; SDAG-NEXT:    # kill: def $ax killed $ax killed $eax
1825; SDAG-NEXT:    mulw (%rsi)
1826; SDAG-NEXT:    seto %dl
1827; SDAG-NEXT:    movw %ax, (%rcx)
1828; SDAG-NEXT:    movl %edx, %eax
1829; SDAG-NEXT:    retq
1830;
1831; FAST-LABEL: umuloi16_load2:
1832; FAST:       # %bb.0:
1833; FAST-NEXT:    movq %rdx, %rcx
1834; FAST-NEXT:    movl %edi, %eax
1835; FAST-NEXT:    # kill: def $ax killed $ax killed $eax
1836; FAST-NEXT:    mulw (%rsi)
1837; FAST-NEXT:    seto %dl
1838; FAST-NEXT:    movw %ax, (%rcx)
1839; FAST-NEXT:    andb $1, %dl
1840; FAST-NEXT:    movzbl %dl, %eax
1841; FAST-NEXT:    retq
1842;
1843; WIN64-LABEL: umuloi16_load2:
1844; WIN64:       # %bb.0:
1845; WIN64-NEXT:    movl %ecx, %eax
1846; WIN64-NEXT:    mulw (%rdx)
1847; WIN64-NEXT:    seto %cl
1848; WIN64-NEXT:    movw %ax, (%r8)
1849; WIN64-NEXT:    movl %ecx, %eax
1850; WIN64-NEXT:    retq
1851;
1852; WIN32-LABEL: umuloi16_load2:
1853; WIN32:       # %bb.0:
1854; WIN32-NEXT:    pushl %esi
1855; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1856; WIN32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
1857; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1858; WIN32-NEXT:    mulw (%ecx)
1859; WIN32-NEXT:    seto %cl
1860; WIN32-NEXT:    movw %ax, (%esi)
1861; WIN32-NEXT:    movl %ecx, %eax
1862; WIN32-NEXT:    popl %esi
1863; WIN32-NEXT:    retl
1864  %v2 = load i16, i16* %ptr2
1865  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
1866  %val = extractvalue {i16, i1} %t, 0
1867  %obit = extractvalue {i16, i1} %t, 1
1868  store i16 %val, i16* %res
1869  ret i1 %obit
1870}
1871
1872define zeroext i1 @umuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
1873; SDAG-LABEL: umuloi32_load:
1874; SDAG:       # %bb.0:
1875; SDAG-NEXT:    movq %rdx, %rcx
1876; SDAG-NEXT:    movl %esi, %eax
1877; SDAG-NEXT:    mull (%rdi)
1878; SDAG-NEXT:    seto %dl
1879; SDAG-NEXT:    movl %eax, (%rcx)
1880; SDAG-NEXT:    movl %edx, %eax
1881; SDAG-NEXT:    retq
1882;
1883; FAST-LABEL: umuloi32_load:
1884; FAST:       # %bb.0:
1885; FAST-NEXT:    movq %rdx, %rcx
1886; FAST-NEXT:    movl (%rdi), %eax
1887; FAST-NEXT:    mull %esi
1888; FAST-NEXT:    seto %dl
1889; FAST-NEXT:    movl %eax, (%rcx)
1890; FAST-NEXT:    andb $1, %dl
1891; FAST-NEXT:    movzbl %dl, %eax
1892; FAST-NEXT:    retq
1893;
1894; WIN64-LABEL: umuloi32_load:
1895; WIN64:       # %bb.0:
1896; WIN64-NEXT:    movl %edx, %eax
1897; WIN64-NEXT:    mull (%rcx)
1898; WIN64-NEXT:    seto %cl
1899; WIN64-NEXT:    movl %eax, (%r8)
1900; WIN64-NEXT:    movl %ecx, %eax
1901; WIN64-NEXT:    retq
1902;
1903; WIN32-LABEL: umuloi32_load:
1904; WIN32:       # %bb.0:
1905; WIN32-NEXT:    pushl %esi
1906; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1907; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1908; WIN32-NEXT:    movl (%eax), %eax
1909; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
1910; WIN32-NEXT:    seto %cl
1911; WIN32-NEXT:    movl %eax, (%esi)
1912; WIN32-NEXT:    movl %ecx, %eax
1913; WIN32-NEXT:    popl %esi
1914; WIN32-NEXT:    retl
1915  %v1 = load i32, i32* %ptr1
1916  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1917  %val = extractvalue {i32, i1} %t, 0
1918  %obit = extractvalue {i32, i1} %t, 1
1919  store i32 %val, i32* %res
1920  ret i1 %obit
1921}
1922
1923define zeroext i1 @umuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
1924; SDAG-LABEL: umuloi32_load2:
1925; SDAG:       # %bb.0:
1926; SDAG-NEXT:    movq %rdx, %rcx
1927; SDAG-NEXT:    movl %edi, %eax
1928; SDAG-NEXT:    mull (%rsi)
1929; SDAG-NEXT:    seto %dl
1930; SDAG-NEXT:    movl %eax, (%rcx)
1931; SDAG-NEXT:    movl %edx, %eax
1932; SDAG-NEXT:    retq
1933;
1934; FAST-LABEL: umuloi32_load2:
1935; FAST:       # %bb.0:
1936; FAST-NEXT:    movq %rdx, %rcx
1937; FAST-NEXT:    movl %edi, %eax
1938; FAST-NEXT:    mull (%rsi)
1939; FAST-NEXT:    seto %dl
1940; FAST-NEXT:    movl %eax, (%rcx)
1941; FAST-NEXT:    andb $1, %dl
1942; FAST-NEXT:    movzbl %dl, %eax
1943; FAST-NEXT:    retq
1944;
1945; WIN64-LABEL: umuloi32_load2:
1946; WIN64:       # %bb.0:
1947; WIN64-NEXT:    movl %ecx, %eax
1948; WIN64-NEXT:    mull (%rdx)
1949; WIN64-NEXT:    seto %cl
1950; WIN64-NEXT:    movl %eax, (%r8)
1951; WIN64-NEXT:    movl %ecx, %eax
1952; WIN64-NEXT:    retq
1953;
1954; WIN32-LABEL: umuloi32_load2:
1955; WIN32:       # %bb.0:
1956; WIN32-NEXT:    pushl %esi
1957; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
1958; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1959; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1960; WIN32-NEXT:    mull (%ecx)
1961; WIN32-NEXT:    seto %cl
1962; WIN32-NEXT:    movl %eax, (%esi)
1963; WIN32-NEXT:    movl %ecx, %eax
1964; WIN32-NEXT:    popl %esi
1965; WIN32-NEXT:    retl
1966  %v2 = load i32, i32* %ptr2
1967  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
1968  %val = extractvalue {i32, i1} %t, 0
1969  %obit = extractvalue {i32, i1} %t, 1
1970  store i32 %val, i32* %res
1971  ret i1 %obit
1972}
1973
1974define zeroext i1 @umuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
1975; SDAG-LABEL: umuloi64_load:
1976; SDAG:       # %bb.0:
1977; SDAG-NEXT:    movq %rdx, %rcx
1978; SDAG-NEXT:    movq %rsi, %rax
1979; SDAG-NEXT:    mulq (%rdi)
1980; SDAG-NEXT:    seto %dl
1981; SDAG-NEXT:    movq %rax, (%rcx)
1982; SDAG-NEXT:    movl %edx, %eax
1983; SDAG-NEXT:    retq
1984;
1985; FAST-LABEL: umuloi64_load:
1986; FAST:       # %bb.0:
1987; FAST-NEXT:    movq %rdx, %rcx
1988; FAST-NEXT:    movq (%rdi), %rax
1989; FAST-NEXT:    mulq %rsi
1990; FAST-NEXT:    seto %dl
1991; FAST-NEXT:    movq %rax, (%rcx)
1992; FAST-NEXT:    andb $1, %dl
1993; FAST-NEXT:    movzbl %dl, %eax
1994; FAST-NEXT:    retq
1995;
1996; WIN64-LABEL: umuloi64_load:
1997; WIN64:       # %bb.0:
1998; WIN64-NEXT:    movq %rdx, %rax
1999; WIN64-NEXT:    mulq (%rcx)
2000; WIN64-NEXT:    seto %cl
2001; WIN64-NEXT:    movq %rax, (%r8)
2002; WIN64-NEXT:    movl %ecx, %eax
2003; WIN64-NEXT:    retq
2004;
2005; WIN32-LABEL: umuloi64_load:
2006; WIN32:       # %bb.0:
2007; WIN32-NEXT:    pushl %ebp
2008; WIN32-NEXT:    pushl %ebx
2009; WIN32-NEXT:    pushl %edi
2010; WIN32-NEXT:    pushl %esi
2011; WIN32-NEXT:    pushl %eax
2012; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
2013; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2014; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2015; WIN32-NEXT:    movl (%eax), %esi
2016; WIN32-NEXT:    movl 4(%eax), %eax
2017; WIN32-NEXT:    testl %ecx, %ecx
2018; WIN32-NEXT:    setne %dl
2019; WIN32-NEXT:    testl %eax, %eax
2020; WIN32-NEXT:    setne %bl
2021; WIN32-NEXT:    andb %dl, %bl
2022; WIN32-NEXT:    mull %ebp
2023; WIN32-NEXT:    movl %eax, %edi
2024; WIN32-NEXT:    seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
2025; WIN32-NEXT:    movl %ecx, %eax
2026; WIN32-NEXT:    mull %esi
2027; WIN32-NEXT:    movl %eax, %ecx
2028; WIN32-NEXT:    seto %bh
2029; WIN32-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
2030; WIN32-NEXT:    addl %edi, %ecx
2031; WIN32-NEXT:    movl %esi, %eax
2032; WIN32-NEXT:    mull %ebp
2033; WIN32-NEXT:    addl %ecx, %edx
2034; WIN32-NEXT:    setb %cl
2035; WIN32-NEXT:    orb %bh, %cl
2036; WIN32-NEXT:    orb %bl, %cl
2037; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
2038; WIN32-NEXT:    movl %eax, (%esi)
2039; WIN32-NEXT:    movl %edx, 4(%esi)
2040; WIN32-NEXT:    movl %ecx, %eax
2041; WIN32-NEXT:    addl $4, %esp
2042; WIN32-NEXT:    popl %esi
2043; WIN32-NEXT:    popl %edi
2044; WIN32-NEXT:    popl %ebx
2045; WIN32-NEXT:    popl %ebp
2046; WIN32-NEXT:    retl
2047  %v1 = load i64, i64* %ptr1
2048  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
2049  %val = extractvalue {i64, i1} %t, 0
2050  %obit = extractvalue {i64, i1} %t, 1
2051  store i64 %val, i64* %res
2052  ret i1 %obit
2053}
2054
2055define zeroext i1 @umuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
2056; SDAG-LABEL: umuloi64_load2:
2057; SDAG:       # %bb.0:
2058; SDAG-NEXT:    movq %rdx, %rcx
2059; SDAG-NEXT:    movq %rdi, %rax
2060; SDAG-NEXT:    mulq (%rsi)
2061; SDAG-NEXT:    seto %dl
2062; SDAG-NEXT:    movq %rax, (%rcx)
2063; SDAG-NEXT:    movl %edx, %eax
2064; SDAG-NEXT:    retq
2065;
2066; FAST-LABEL: umuloi64_load2:
2067; FAST:       # %bb.0:
2068; FAST-NEXT:    movq %rdx, %rcx
2069; FAST-NEXT:    movq %rdi, %rax
2070; FAST-NEXT:    mulq (%rsi)
2071; FAST-NEXT:    seto %dl
2072; FAST-NEXT:    movq %rax, (%rcx)
2073; FAST-NEXT:    andb $1, %dl
2074; FAST-NEXT:    movzbl %dl, %eax
2075; FAST-NEXT:    retq
2076;
2077; WIN64-LABEL: umuloi64_load2:
2078; WIN64:       # %bb.0:
2079; WIN64-NEXT:    movq %rcx, %rax
2080; WIN64-NEXT:    mulq (%rdx)
2081; WIN64-NEXT:    seto %cl
2082; WIN64-NEXT:    movq %rax, (%r8)
2083; WIN64-NEXT:    movl %ecx, %eax
2084; WIN64-NEXT:    retq
2085;
2086; WIN32-LABEL: umuloi64_load2:
2087; WIN32:       # %bb.0:
2088; WIN32-NEXT:    pushl %ebp
2089; WIN32-NEXT:    pushl %ebx
2090; WIN32-NEXT:    pushl %edi
2091; WIN32-NEXT:    pushl %esi
2092; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2093; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
2094; WIN32-NEXT:    movl (%edx), %ebp
2095; WIN32-NEXT:    movl 4(%edx), %esi
2096; WIN32-NEXT:    testl %eax, %eax
2097; WIN32-NEXT:    setne %dl
2098; WIN32-NEXT:    testl %esi, %esi
2099; WIN32-NEXT:    setne %bl
2100; WIN32-NEXT:    andb %dl, %bl
2101; WIN32-NEXT:    mull %ebp
2102; WIN32-NEXT:    movl %eax, %edi
2103; WIN32-NEXT:    seto %cl
2104; WIN32-NEXT:    movl %esi, %eax
2105; WIN32-NEXT:    mull {{[0-9]+}}(%esp)
2106; WIN32-NEXT:    movl %eax, %esi
2107; WIN32-NEXT:    seto %ch
2108; WIN32-NEXT:    orb %cl, %ch
2109; WIN32-NEXT:    addl %edi, %esi
2110; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
2111; WIN32-NEXT:    mull %ebp
2112; WIN32-NEXT:    addl %esi, %edx
2113; WIN32-NEXT:    setb %cl
2114; WIN32-NEXT:    orb %ch, %cl
2115; WIN32-NEXT:    orb %bl, %cl
2116; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
2117; WIN32-NEXT:    movl %eax, (%esi)
2118; WIN32-NEXT:    movl %edx, 4(%esi)
2119; WIN32-NEXT:    movl %ecx, %eax
2120; WIN32-NEXT:    popl %esi
2121; WIN32-NEXT:    popl %edi
2122; WIN32-NEXT:    popl %ebx
2123; WIN32-NEXT:    popl %ebp
2124; WIN32-NEXT:    retl
2125  %v2 = load i64, i64* %ptr2
2126  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
2127  %val = extractvalue {i64, i1} %t, 0
2128  %obit = extractvalue {i64, i1} %t, 1
2129  store i64 %val, i64* %res
2130  ret i1 %obit
2131}
2132
2133declare {i8,  i1} @llvm.smul.with.overflow.i8 (i8,  i8 ) nounwind readnone
2134declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
2135declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
2136declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
2137declare {i8,  i1} @llvm.umul.with.overflow.i8 (i8,  i8 ) nounwind readnone
2138declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
2139declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
2140declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
2141
2142!0 = !{!"branch_weights", i32 0, i32 2147483647}
2143