1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG 3; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,LINUX,FAST 4; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG 5; RUN: llc -disable-peephole -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefixes=CHECK,WIN64 6; RUN: llc -disable-peephole -mtriple=i386-pc-win32 < %s | FileCheck %s --check-prefix=WIN32 7 8define {i64, i1} @t1() nounwind { 9; CHECK-LABEL: t1: 10; CHECK: # %bb.0: 11; CHECK-NEXT: movl $72, %eax 12; CHECK-NEXT: xorl %edx, %edx 13; CHECK-NEXT: retq 14; 15; WIN32-LABEL: t1: 16; WIN32: # %bb.0: 17; WIN32-NEXT: movl $72, %eax 18; WIN32-NEXT: xorl %edx, %edx 19; WIN32-NEXT: xorl %ecx, %ecx 20; WIN32-NEXT: retl 21 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 8) 22 ret {i64, i1} %1 23} 24 25define {i64, i1} @t2() nounwind { 26; CHECK-LABEL: t2: 27; CHECK: # %bb.0: 28; CHECK-NEXT: xorl %eax, %eax 29; CHECK-NEXT: xorl %edx, %edx 30; CHECK-NEXT: retq 31; 32; WIN32-LABEL: t2: 33; WIN32: # %bb.0: 34; WIN32-NEXT: xorl %eax, %eax 35; WIN32-NEXT: xorl %edx, %edx 36; WIN32-NEXT: xorl %ecx, %ecx 37; WIN32-NEXT: retl 38 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 0) 39 ret {i64, i1} %1 40} 41 42define {i64, i1} @t3() nounwind { 43; CHECK-LABEL: t3: 44; CHECK: # %bb.0: 45; CHECK-NEXT: movq $-9, %rax 46; CHECK-NEXT: movb $1, %dl 47; CHECK-NEXT: retq 48; 49; WIN32-LABEL: t3: 50; WIN32: # %bb.0: 51; WIN32-NEXT: movl $-9, %eax 52; WIN32-NEXT: movl $-1, %edx 53; WIN32-NEXT: movb $1, %cl 54; WIN32-NEXT: retl 55 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 -1) 56 ret {i64, i1} %1 57} 58 59; SMULO 60define zeroext i1 @smuloi8(i8 %v1, i8 %v2, i8* %res) { 61; SDAG-LABEL: smuloi8: 62; SDAG: # %bb.0: 63; SDAG-NEXT: movl %edi, %eax 64; SDAG-NEXT: # kill: def $al killed $al killed $eax 65; SDAG-NEXT: imulb %sil 66; SDAG-NEXT: seto %cl 67; SDAG-NEXT: movb %al, (%rdx) 68; SDAG-NEXT: movl %ecx, %eax 69; SDAG-NEXT: retq 70; 71; FAST-LABEL: smuloi8: 72; FAST: # %bb.0: 73; FAST-NEXT: movl %edi, %eax 74; FAST-NEXT: # kill: def $al killed $al killed $eax 75; FAST-NEXT: imulb %sil 76; FAST-NEXT: seto %cl 77; FAST-NEXT: movb %al, (%rdx) 78; FAST-NEXT: andb $1, %cl 79; FAST-NEXT: movzbl %cl, %eax 80; FAST-NEXT: retq 81; 82; WIN64-LABEL: smuloi8: 83; WIN64: # %bb.0: 84; WIN64-NEXT: movl %ecx, %eax 85; WIN64-NEXT: imulb %dl 86; WIN64-NEXT: seto %cl 87; WIN64-NEXT: movb %al, (%r8) 88; WIN64-NEXT: movl %ecx, %eax 89; WIN64-NEXT: retq 90; 91; WIN32-LABEL: smuloi8: 92; WIN32: # %bb.0: 93; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 94; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al 95; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 96; WIN32-NEXT: seto %cl 97; WIN32-NEXT: movb %al, (%edx) 98; WIN32-NEXT: movl %ecx, %eax 99; WIN32-NEXT: retl 100 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 101 %val = extractvalue {i8, i1} %t, 0 102 %obit = extractvalue {i8, i1} %t, 1 103 store i8 %val, i8* %res 104 ret i1 %obit 105} 106 107define zeroext i1 @smuloi16(i16 %v1, i16 %v2, i16* %res) { 108; SDAG-LABEL: smuloi16: 109; SDAG: # %bb.0: 110; SDAG-NEXT: imulw %si, %di 111; SDAG-NEXT: seto %al 112; SDAG-NEXT: movw %di, (%rdx) 113; SDAG-NEXT: retq 114; 115; FAST-LABEL: smuloi16: 116; FAST: # %bb.0: 117; FAST-NEXT: imulw %si, %di 118; FAST-NEXT: seto %al 119; FAST-NEXT: movw %di, (%rdx) 120; FAST-NEXT: andb $1, %al 121; FAST-NEXT: movzbl %al, %eax 122; FAST-NEXT: retq 123; 124; WIN64-LABEL: smuloi16: 125; WIN64: # %bb.0: 126; WIN64-NEXT: imulw %dx, %cx 127; WIN64-NEXT: seto %al 128; WIN64-NEXT: movw %cx, (%r8) 129; WIN64-NEXT: retq 130; 131; WIN32-LABEL: smuloi16: 132; WIN32: # %bb.0: 133; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 134; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx 135; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %dx 136; WIN32-NEXT: seto %al 137; WIN32-NEXT: movw %dx, (%ecx) 138; WIN32-NEXT: retl 139 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 140 %val = extractvalue {i16, i1} %t, 0 141 %obit = extractvalue {i16, i1} %t, 1 142 store i16 %val, i16* %res 143 ret i1 %obit 144} 145 146define zeroext i1 @smuloi32(i32 %v1, i32 %v2, i32* %res) { 147; SDAG-LABEL: smuloi32: 148; SDAG: # %bb.0: 149; SDAG-NEXT: imull %esi, %edi 150; SDAG-NEXT: seto %al 151; SDAG-NEXT: movl %edi, (%rdx) 152; SDAG-NEXT: retq 153; 154; FAST-LABEL: smuloi32: 155; FAST: # %bb.0: 156; FAST-NEXT: imull %esi, %edi 157; FAST-NEXT: seto %al 158; FAST-NEXT: movl %edi, (%rdx) 159; FAST-NEXT: andb $1, %al 160; FAST-NEXT: movzbl %al, %eax 161; FAST-NEXT: retq 162; 163; WIN64-LABEL: smuloi32: 164; WIN64: # %bb.0: 165; WIN64-NEXT: imull %edx, %ecx 166; WIN64-NEXT: seto %al 167; WIN64-NEXT: movl %ecx, (%r8) 168; WIN64-NEXT: retq 169; 170; WIN32-LABEL: smuloi32: 171; WIN32: # %bb.0: 172; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 173; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 174; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx 175; WIN32-NEXT: seto %al 176; WIN32-NEXT: movl %edx, (%ecx) 177; WIN32-NEXT: retl 178 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 179 %val = extractvalue {i32, i1} %t, 0 180 %obit = extractvalue {i32, i1} %t, 1 181 store i32 %val, i32* %res 182 ret i1 %obit 183} 184 185define zeroext i1 @smuloi64(i64 %v1, i64 %v2, i64* %res) { 186; SDAG-LABEL: smuloi64: 187; SDAG: # %bb.0: 188; SDAG-NEXT: imulq %rsi, %rdi 189; SDAG-NEXT: seto %al 190; SDAG-NEXT: movq %rdi, (%rdx) 191; SDAG-NEXT: retq 192; 193; FAST-LABEL: smuloi64: 194; FAST: # %bb.0: 195; FAST-NEXT: imulq %rsi, %rdi 196; FAST-NEXT: seto %al 197; FAST-NEXT: movq %rdi, (%rdx) 198; FAST-NEXT: andb $1, %al 199; FAST-NEXT: movzbl %al, %eax 200; FAST-NEXT: retq 201; 202; WIN64-LABEL: smuloi64: 203; WIN64: # %bb.0: 204; WIN64-NEXT: imulq %rdx, %rcx 205; WIN64-NEXT: seto %al 206; WIN64-NEXT: movq %rcx, (%r8) 207; WIN64-NEXT: retq 208; 209; WIN32-LABEL: smuloi64: 210; WIN32: # %bb.0: 211; WIN32-NEXT: pushl %ebx 212; WIN32-NEXT: pushl %edi 213; WIN32-NEXT: pushl %esi 214; WIN32-NEXT: pushl %eax 215; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 216; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 217; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 218; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 219; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 220; WIN32-NEXT: movl $0, (%esp) 221; WIN32-NEXT: movl %esp, %ebx 222; WIN32-NEXT: pushl %ebx 223; WIN32-NEXT: pushl %edi 224; WIN32-NEXT: pushl %edx 225; WIN32-NEXT: pushl %ecx 226; WIN32-NEXT: pushl %eax 227; WIN32-NEXT: calll ___mulodi4 228; WIN32-NEXT: addl $20, %esp 229; WIN32-NEXT: cmpl $0, (%esp) 230; WIN32-NEXT: setne %cl 231; WIN32-NEXT: movl %edx, 4(%esi) 232; WIN32-NEXT: movl %eax, (%esi) 233; WIN32-NEXT: movl %ecx, %eax 234; WIN32-NEXT: addl $4, %esp 235; WIN32-NEXT: popl %esi 236; WIN32-NEXT: popl %edi 237; WIN32-NEXT: popl %ebx 238; WIN32-NEXT: retl 239 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 240 %val = extractvalue {i64, i1} %t, 0 241 %obit = extractvalue {i64, i1} %t, 1 242 store i64 %val, i64* %res 243 ret i1 %obit 244} 245 246; UMULO 247define zeroext i1 @umuloi8(i8 %v1, i8 %v2, i8* %res) { 248; SDAG-LABEL: umuloi8: 249; SDAG: # %bb.0: 250; SDAG-NEXT: movl %edi, %eax 251; SDAG-NEXT: # kill: def $al killed $al killed $eax 252; SDAG-NEXT: mulb %sil 253; SDAG-NEXT: seto %cl 254; SDAG-NEXT: movb %al, (%rdx) 255; SDAG-NEXT: movl %ecx, %eax 256; SDAG-NEXT: retq 257; 258; FAST-LABEL: umuloi8: 259; FAST: # %bb.0: 260; FAST-NEXT: movl %edi, %eax 261; FAST-NEXT: # kill: def $al killed $al killed $eax 262; FAST-NEXT: mulb %sil 263; FAST-NEXT: seto %cl 264; FAST-NEXT: movb %al, (%rdx) 265; FAST-NEXT: andb $1, %cl 266; FAST-NEXT: movzbl %cl, %eax 267; FAST-NEXT: retq 268; 269; WIN64-LABEL: umuloi8: 270; WIN64: # %bb.0: 271; WIN64-NEXT: movl %ecx, %eax 272; WIN64-NEXT: mulb %dl 273; WIN64-NEXT: seto %cl 274; WIN64-NEXT: movb %al, (%r8) 275; WIN64-NEXT: movl %ecx, %eax 276; WIN64-NEXT: retq 277; 278; WIN32-LABEL: umuloi8: 279; WIN32: # %bb.0: 280; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 281; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al 282; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 283; WIN32-NEXT: seto %cl 284; WIN32-NEXT: movb %al, (%edx) 285; WIN32-NEXT: movl %ecx, %eax 286; WIN32-NEXT: retl 287 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 288 %val = extractvalue {i8, i1} %t, 0 289 %obit = extractvalue {i8, i1} %t, 1 290 store i8 %val, i8* %res 291 ret i1 %obit 292} 293 294define zeroext i1 @umuloi16(i16 %v1, i16 %v2, i16* %res) { 295; SDAG-LABEL: umuloi16: 296; SDAG: # %bb.0: 297; SDAG-NEXT: movq %rdx, %rcx 298; SDAG-NEXT: movl %edi, %eax 299; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 300; SDAG-NEXT: mulw %si 301; SDAG-NEXT: seto %dl 302; SDAG-NEXT: movw %ax, (%rcx) 303; SDAG-NEXT: movl %edx, %eax 304; SDAG-NEXT: retq 305; 306; FAST-LABEL: umuloi16: 307; FAST: # %bb.0: 308; FAST-NEXT: movq %rdx, %rcx 309; FAST-NEXT: movl %edi, %eax 310; FAST-NEXT: # kill: def $ax killed $ax killed $eax 311; FAST-NEXT: mulw %si 312; FAST-NEXT: seto %dl 313; FAST-NEXT: movw %ax, (%rcx) 314; FAST-NEXT: andb $1, %dl 315; FAST-NEXT: movzbl %dl, %eax 316; FAST-NEXT: retq 317; 318; WIN64-LABEL: umuloi16: 319; WIN64: # %bb.0: 320; WIN64-NEXT: movl %ecx, %eax 321; WIN64-NEXT: mulw %dx 322; WIN64-NEXT: seto %cl 323; WIN64-NEXT: movw %ax, (%r8) 324; WIN64-NEXT: movl %ecx, %eax 325; WIN64-NEXT: retq 326; 327; WIN32-LABEL: umuloi16: 328; WIN32: # %bb.0: 329; WIN32-NEXT: pushl %esi 330; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 331; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 332; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 333; WIN32-NEXT: seto %cl 334; WIN32-NEXT: movw %ax, (%esi) 335; WIN32-NEXT: movl %ecx, %eax 336; WIN32-NEXT: popl %esi 337; WIN32-NEXT: retl 338 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 339 %val = extractvalue {i16, i1} %t, 0 340 %obit = extractvalue {i16, i1} %t, 1 341 store i16 %val, i16* %res 342 ret i1 %obit 343} 344 345define zeroext i1 @umuloi32(i32 %v1, i32 %v2, i32* %res) { 346; SDAG-LABEL: umuloi32: 347; SDAG: # %bb.0: 348; SDAG-NEXT: movq %rdx, %rcx 349; SDAG-NEXT: movl %edi, %eax 350; SDAG-NEXT: mull %esi 351; SDAG-NEXT: seto %dl 352; SDAG-NEXT: movl %eax, (%rcx) 353; SDAG-NEXT: movl %edx, %eax 354; SDAG-NEXT: retq 355; 356; FAST-LABEL: umuloi32: 357; FAST: # %bb.0: 358; FAST-NEXT: movq %rdx, %rcx 359; FAST-NEXT: movl %edi, %eax 360; FAST-NEXT: mull %esi 361; FAST-NEXT: seto %dl 362; FAST-NEXT: movl %eax, (%rcx) 363; FAST-NEXT: andb $1, %dl 364; FAST-NEXT: movzbl %dl, %eax 365; FAST-NEXT: retq 366; 367; WIN64-LABEL: umuloi32: 368; WIN64: # %bb.0: 369; WIN64-NEXT: movl %ecx, %eax 370; WIN64-NEXT: mull %edx 371; WIN64-NEXT: seto %cl 372; WIN64-NEXT: movl %eax, (%r8) 373; WIN64-NEXT: movl %ecx, %eax 374; WIN64-NEXT: retq 375; 376; WIN32-LABEL: umuloi32: 377; WIN32: # %bb.0: 378; WIN32-NEXT: pushl %esi 379; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 380; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 381; WIN32-NEXT: mull {{[0-9]+}}(%esp) 382; WIN32-NEXT: seto %cl 383; WIN32-NEXT: movl %eax, (%esi) 384; WIN32-NEXT: movl %ecx, %eax 385; WIN32-NEXT: popl %esi 386; WIN32-NEXT: retl 387 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 388 %val = extractvalue {i32, i1} %t, 0 389 %obit = extractvalue {i32, i1} %t, 1 390 store i32 %val, i32* %res 391 ret i1 %obit 392} 393 394define zeroext i1 @umuloi64(i64 %v1, i64 %v2, i64* %res) { 395; SDAG-LABEL: umuloi64: 396; SDAG: # %bb.0: 397; SDAG-NEXT: movq %rdx, %rcx 398; SDAG-NEXT: movq %rdi, %rax 399; SDAG-NEXT: mulq %rsi 400; SDAG-NEXT: seto %dl 401; SDAG-NEXT: movq %rax, (%rcx) 402; SDAG-NEXT: movl %edx, %eax 403; SDAG-NEXT: retq 404; 405; FAST-LABEL: umuloi64: 406; FAST: # %bb.0: 407; FAST-NEXT: movq %rdx, %rcx 408; FAST-NEXT: movq %rdi, %rax 409; FAST-NEXT: mulq %rsi 410; FAST-NEXT: seto %dl 411; FAST-NEXT: movq %rax, (%rcx) 412; FAST-NEXT: andb $1, %dl 413; FAST-NEXT: movzbl %dl, %eax 414; FAST-NEXT: retq 415; 416; WIN64-LABEL: umuloi64: 417; WIN64: # %bb.0: 418; WIN64-NEXT: movq %rcx, %rax 419; WIN64-NEXT: mulq %rdx 420; WIN64-NEXT: seto %cl 421; WIN64-NEXT: movq %rax, (%r8) 422; WIN64-NEXT: movl %ecx, %eax 423; WIN64-NEXT: retq 424; 425; WIN32-LABEL: umuloi64: 426; WIN32: # %bb.0: 427; WIN32-NEXT: pushl %ebp 428; WIN32-NEXT: pushl %ebx 429; WIN32-NEXT: pushl %edi 430; WIN32-NEXT: pushl %esi 431; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 432; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 433; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 434; WIN32-NEXT: testl %esi, %esi 435; WIN32-NEXT: setne %dl 436; WIN32-NEXT: testl %eax, %eax 437; WIN32-NEXT: setne %bl 438; WIN32-NEXT: andb %dl, %bl 439; WIN32-NEXT: mull {{[0-9]+}}(%esp) 440; WIN32-NEXT: movl %eax, %edi 441; WIN32-NEXT: seto %cl 442; WIN32-NEXT: movl %esi, %eax 443; WIN32-NEXT: mull %ebp 444; WIN32-NEXT: movl %eax, %esi 445; WIN32-NEXT: seto %ch 446; WIN32-NEXT: orb %cl, %ch 447; WIN32-NEXT: addl %edi, %esi 448; WIN32-NEXT: movl %ebp, %eax 449; WIN32-NEXT: mull {{[0-9]+}}(%esp) 450; WIN32-NEXT: addl %esi, %edx 451; WIN32-NEXT: setb %cl 452; WIN32-NEXT: orb %ch, %cl 453; WIN32-NEXT: orb %bl, %cl 454; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 455; WIN32-NEXT: movl %eax, (%esi) 456; WIN32-NEXT: movl %edx, 4(%esi) 457; WIN32-NEXT: movl %ecx, %eax 458; WIN32-NEXT: popl %esi 459; WIN32-NEXT: popl %edi 460; WIN32-NEXT: popl %ebx 461; WIN32-NEXT: popl %ebp 462; WIN32-NEXT: retl 463 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 464 %val = extractvalue {i64, i1} %t, 0 465 %obit = extractvalue {i64, i1} %t, 1 466 store i64 %val, i64* %res 467 ret i1 %obit 468} 469 470; 471; Check the use of the overflow bit in combination with a select instruction. 472; 473define i32 @smuloselecti32(i32 %v1, i32 %v2) { 474; LINUX-LABEL: smuloselecti32: 475; LINUX: # %bb.0: 476; LINUX-NEXT: movl %esi, %eax 477; LINUX-NEXT: movl %edi, %ecx 478; LINUX-NEXT: imull %esi, %ecx 479; LINUX-NEXT: cmovol %edi, %eax 480; LINUX-NEXT: retq 481; 482; WIN64-LABEL: smuloselecti32: 483; WIN64: # %bb.0: 484; WIN64-NEXT: movl %edx, %eax 485; WIN64-NEXT: movl %ecx, %edx 486; WIN64-NEXT: imull %eax, %edx 487; WIN64-NEXT: cmovol %ecx, %eax 488; WIN64-NEXT: retq 489; 490; WIN32-LABEL: smuloselecti32: 491; WIN32: # %bb.0: 492; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 493; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 494; WIN32-NEXT: movl %eax, %edx 495; WIN32-NEXT: imull %ecx, %edx 496; WIN32-NEXT: jo LBB11_2 497; WIN32-NEXT: # %bb.1: 498; WIN32-NEXT: movl %ecx, %eax 499; WIN32-NEXT: LBB11_2: 500; WIN32-NEXT: retl 501 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 502 %obit = extractvalue {i32, i1} %t, 1 503 %ret = select i1 %obit, i32 %v1, i32 %v2 504 ret i32 %ret 505} 506 507define i64 @smuloselecti64(i64 %v1, i64 %v2) { 508; LINUX-LABEL: smuloselecti64: 509; LINUX: # %bb.0: 510; LINUX-NEXT: movq %rsi, %rax 511; LINUX-NEXT: movq %rdi, %rcx 512; LINUX-NEXT: imulq %rsi, %rcx 513; LINUX-NEXT: cmovoq %rdi, %rax 514; LINUX-NEXT: retq 515; 516; WIN64-LABEL: smuloselecti64: 517; WIN64: # %bb.0: 518; WIN64-NEXT: movq %rdx, %rax 519; WIN64-NEXT: movq %rcx, %rdx 520; WIN64-NEXT: imulq %rax, %rdx 521; WIN64-NEXT: cmovoq %rcx, %rax 522; WIN64-NEXT: retq 523; 524; WIN32-LABEL: smuloselecti64: 525; WIN32: # %bb.0: 526; WIN32-NEXT: pushl %ebp 527; WIN32-NEXT: pushl %ebx 528; WIN32-NEXT: pushl %edi 529; WIN32-NEXT: pushl %esi 530; WIN32-NEXT: pushl %eax 531; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 532; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 533; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 534; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 535; WIN32-NEXT: movl $0, (%esp) 536; WIN32-NEXT: movl %esp, %eax 537; WIN32-NEXT: pushl %eax 538; WIN32-NEXT: pushl %ebp 539; WIN32-NEXT: pushl %ebx 540; WIN32-NEXT: pushl %edi 541; WIN32-NEXT: pushl %esi 542; WIN32-NEXT: calll ___mulodi4 543; WIN32-NEXT: addl $20, %esp 544; WIN32-NEXT: cmpl $0, (%esp) 545; WIN32-NEXT: jne LBB12_2 546; WIN32-NEXT: # %bb.1: 547; WIN32-NEXT: movl %ebx, %esi 548; WIN32-NEXT: movl %ebp, %edi 549; WIN32-NEXT: LBB12_2: 550; WIN32-NEXT: movl %esi, %eax 551; WIN32-NEXT: movl %edi, %edx 552; WIN32-NEXT: addl $4, %esp 553; WIN32-NEXT: popl %esi 554; WIN32-NEXT: popl %edi 555; WIN32-NEXT: popl %ebx 556; WIN32-NEXT: popl %ebp 557; WIN32-NEXT: retl 558 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 559 %obit = extractvalue {i64, i1} %t, 1 560 %ret = select i1 %obit, i64 %v1, i64 %v2 561 ret i64 %ret 562} 563 564define i32 @umuloselecti32(i32 %v1, i32 %v2) { 565; LINUX-LABEL: umuloselecti32: 566; LINUX: # %bb.0: 567; LINUX-NEXT: movl %edi, %eax 568; LINUX-NEXT: mull %esi 569; LINUX-NEXT: cmovol %edi, %esi 570; LINUX-NEXT: movl %esi, %eax 571; LINUX-NEXT: retq 572; 573; WIN64-LABEL: umuloselecti32: 574; WIN64: # %bb.0: 575; WIN64-NEXT: movl %edx, %r8d 576; WIN64-NEXT: movl %ecx, %eax 577; WIN64-NEXT: mull %edx 578; WIN64-NEXT: cmovol %ecx, %r8d 579; WIN64-NEXT: movl %r8d, %eax 580; WIN64-NEXT: retq 581; 582; WIN32-LABEL: umuloselecti32: 583; WIN32: # %bb.0: 584; WIN32-NEXT: pushl %esi 585; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 586; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 587; WIN32-NEXT: movl %ecx, %eax 588; WIN32-NEXT: mull %esi 589; WIN32-NEXT: jo LBB13_2 590; WIN32-NEXT: # %bb.1: 591; WIN32-NEXT: movl %esi, %ecx 592; WIN32-NEXT: LBB13_2: 593; WIN32-NEXT: movl %ecx, %eax 594; WIN32-NEXT: popl %esi 595; WIN32-NEXT: retl 596 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 597 %obit = extractvalue {i32, i1} %t, 1 598 %ret = select i1 %obit, i32 %v1, i32 %v2 599 ret i32 %ret 600} 601 602define i64 @umuloselecti64(i64 %v1, i64 %v2) { 603; LINUX-LABEL: umuloselecti64: 604; LINUX: # %bb.0: 605; LINUX-NEXT: movq %rdi, %rax 606; LINUX-NEXT: mulq %rsi 607; LINUX-NEXT: cmovoq %rdi, %rsi 608; LINUX-NEXT: movq %rsi, %rax 609; LINUX-NEXT: retq 610; 611; WIN64-LABEL: umuloselecti64: 612; WIN64: # %bb.0: 613; WIN64-NEXT: movq %rdx, %r8 614; WIN64-NEXT: movq %rcx, %rax 615; WIN64-NEXT: mulq %rdx 616; WIN64-NEXT: cmovoq %rcx, %r8 617; WIN64-NEXT: movq %r8, %rax 618; WIN64-NEXT: retq 619; 620; WIN32-LABEL: umuloselecti64: 621; WIN32: # %bb.0: 622; WIN32-NEXT: pushl %ebp 623; WIN32-NEXT: pushl %ebx 624; WIN32-NEXT: pushl %edi 625; WIN32-NEXT: pushl %esi 626; WIN32-NEXT: pushl %eax 627; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 628; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 629; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 630; WIN32-NEXT: testl %ebp, %ebp 631; WIN32-NEXT: setne %al 632; WIN32-NEXT: testl %esi, %esi 633; WIN32-NEXT: setne %bl 634; WIN32-NEXT: andb %al, %bl 635; WIN32-NEXT: movl %esi, %eax 636; WIN32-NEXT: mull {{[0-9]+}}(%esp) 637; WIN32-NEXT: movl %eax, %edi 638; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 639; WIN32-NEXT: movl %ebp, %eax 640; WIN32-NEXT: mull %ecx 641; WIN32-NEXT: movl %eax, %ebp 642; WIN32-NEXT: seto %bh 643; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload 644; WIN32-NEXT: addl %edi, %ebp 645; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 646; WIN32-NEXT: movl %ecx, %eax 647; WIN32-NEXT: mull %edi 648; WIN32-NEXT: addl %ebp, %edx 649; WIN32-NEXT: setb %al 650; WIN32-NEXT: orb %bh, %al 651; WIN32-NEXT: orb %bl, %al 652; WIN32-NEXT: testb %al, %al 653; WIN32-NEXT: jne LBB14_2 654; WIN32-NEXT: # %bb.1: 655; WIN32-NEXT: movl %edi, %ecx 656; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 657; WIN32-NEXT: LBB14_2: 658; WIN32-NEXT: movl %ecx, %eax 659; WIN32-NEXT: movl %esi, %edx 660; WIN32-NEXT: addl $4, %esp 661; WIN32-NEXT: popl %esi 662; WIN32-NEXT: popl %edi 663; WIN32-NEXT: popl %ebx 664; WIN32-NEXT: popl %ebp 665; WIN32-NEXT: retl 666 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 667 %obit = extractvalue {i64, i1} %t, 1 668 %ret = select i1 %obit, i64 %v1, i64 %v2 669 ret i64 %ret 670} 671 672; 673; Check the use of the overflow bit in combination with a branch instruction. 674; 675define zeroext i1 @smulobri8(i8 %v1, i8 %v2) { 676; SDAG-LABEL: smulobri8: 677; SDAG: # %bb.0: 678; SDAG-NEXT: movl %edi, %eax 679; SDAG-NEXT: # kill: def $al killed $al killed $eax 680; SDAG-NEXT: imulb %sil 681; SDAG-NEXT: jo .LBB15_1 682; SDAG-NEXT: # %bb.2: # %continue 683; SDAG-NEXT: movb $1, %al 684; SDAG-NEXT: retq 685; SDAG-NEXT: .LBB15_1: # %overflow 686; SDAG-NEXT: xorl %eax, %eax 687; SDAG-NEXT: retq 688; 689; FAST-LABEL: smulobri8: 690; FAST: # %bb.0: 691; FAST-NEXT: movl %edi, %eax 692; FAST-NEXT: # kill: def $al killed $al killed $eax 693; FAST-NEXT: imulb %sil 694; FAST-NEXT: seto %al 695; FAST-NEXT: testb $1, %al 696; FAST-NEXT: jne .LBB15_1 697; FAST-NEXT: # %bb.2: # %continue 698; FAST-NEXT: movb $1, %al 699; FAST-NEXT: andb $1, %al 700; FAST-NEXT: movzbl %al, %eax 701; FAST-NEXT: retq 702; FAST-NEXT: .LBB15_1: # %overflow 703; FAST-NEXT: xorl %eax, %eax 704; FAST-NEXT: andb $1, %al 705; FAST-NEXT: movzbl %al, %eax 706; FAST-NEXT: retq 707; 708; WIN64-LABEL: smulobri8: 709; WIN64: # %bb.0: 710; WIN64-NEXT: movl %ecx, %eax 711; WIN64-NEXT: imulb %dl 712; WIN64-NEXT: jo .LBB15_1 713; WIN64-NEXT: # %bb.2: # %continue 714; WIN64-NEXT: movb $1, %al 715; WIN64-NEXT: retq 716; WIN64-NEXT: .LBB15_1: # %overflow 717; WIN64-NEXT: xorl %eax, %eax 718; WIN64-NEXT: retq 719; 720; WIN32-LABEL: smulobri8: 721; WIN32: # %bb.0: 722; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al 723; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 724; WIN32-NEXT: jo LBB15_1 725; WIN32-NEXT: # %bb.2: # %continue 726; WIN32-NEXT: movb $1, %al 727; WIN32-NEXT: retl 728; WIN32-NEXT: LBB15_1: # %overflow 729; WIN32-NEXT: xorl %eax, %eax 730; WIN32-NEXT: retl 731 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 732 %val = extractvalue {i8, i1} %t, 0 733 %obit = extractvalue {i8, i1} %t, 1 734 br i1 %obit, label %overflow, label %continue, !prof !0 735 736overflow: 737 ret i1 false 738 739continue: 740 ret i1 true 741} 742 743define zeroext i1 @smulobri16(i16 %v1, i16 %v2) { 744; SDAG-LABEL: smulobri16: 745; SDAG: # %bb.0: 746; SDAG-NEXT: imulw %si, %di 747; SDAG-NEXT: jo .LBB16_1 748; SDAG-NEXT: # %bb.2: # %continue 749; SDAG-NEXT: movb $1, %al 750; SDAG-NEXT: retq 751; SDAG-NEXT: .LBB16_1: # %overflow 752; SDAG-NEXT: xorl %eax, %eax 753; SDAG-NEXT: retq 754; 755; FAST-LABEL: smulobri16: 756; FAST: # %bb.0: 757; FAST-NEXT: imulw %si, %di 758; FAST-NEXT: seto %al 759; FAST-NEXT: testb $1, %al 760; FAST-NEXT: jne .LBB16_1 761; FAST-NEXT: # %bb.2: # %continue 762; FAST-NEXT: movb $1, %al 763; FAST-NEXT: andb $1, %al 764; FAST-NEXT: movzbl %al, %eax 765; FAST-NEXT: retq 766; FAST-NEXT: .LBB16_1: # %overflow 767; FAST-NEXT: xorl %eax, %eax 768; FAST-NEXT: andb $1, %al 769; FAST-NEXT: movzbl %al, %eax 770; FAST-NEXT: retq 771; 772; WIN64-LABEL: smulobri16: 773; WIN64: # %bb.0: 774; WIN64-NEXT: imulw %dx, %cx 775; WIN64-NEXT: jo .LBB16_1 776; WIN64-NEXT: # %bb.2: # %continue 777; WIN64-NEXT: movb $1, %al 778; WIN64-NEXT: retq 779; WIN64-NEXT: .LBB16_1: # %overflow 780; WIN64-NEXT: xorl %eax, %eax 781; WIN64-NEXT: retq 782; 783; WIN32-LABEL: smulobri16: 784; WIN32: # %bb.0: 785; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 786; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %ax 787; WIN32-NEXT: jo LBB16_1 788; WIN32-NEXT: # %bb.2: # %continue 789; WIN32-NEXT: movb $1, %al 790; WIN32-NEXT: retl 791; WIN32-NEXT: LBB16_1: # %overflow 792; WIN32-NEXT: xorl %eax, %eax 793; WIN32-NEXT: retl 794 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 795 %val = extractvalue {i16, i1} %t, 0 796 %obit = extractvalue {i16, i1} %t, 1 797 br i1 %obit, label %overflow, label %continue, !prof !0 798 799overflow: 800 ret i1 false 801 802continue: 803 ret i1 true 804} 805 806define zeroext i1 @smulobri32(i32 %v1, i32 %v2) { 807; SDAG-LABEL: smulobri32: 808; SDAG: # %bb.0: 809; SDAG-NEXT: imull %esi, %edi 810; SDAG-NEXT: jo .LBB17_1 811; SDAG-NEXT: # %bb.2: # %continue 812; SDAG-NEXT: movb $1, %al 813; SDAG-NEXT: retq 814; SDAG-NEXT: .LBB17_1: # %overflow 815; SDAG-NEXT: xorl %eax, %eax 816; SDAG-NEXT: retq 817; 818; FAST-LABEL: smulobri32: 819; FAST: # %bb.0: 820; FAST-NEXT: imull %esi, %edi 821; FAST-NEXT: jo .LBB17_1 822; FAST-NEXT: # %bb.2: # %continue 823; FAST-NEXT: movb $1, %al 824; FAST-NEXT: andb $1, %al 825; FAST-NEXT: movzbl %al, %eax 826; FAST-NEXT: retq 827; FAST-NEXT: .LBB17_1: # %overflow 828; FAST-NEXT: xorl %eax, %eax 829; FAST-NEXT: andb $1, %al 830; FAST-NEXT: movzbl %al, %eax 831; FAST-NEXT: retq 832; 833; WIN64-LABEL: smulobri32: 834; WIN64: # %bb.0: 835; WIN64-NEXT: imull %edx, %ecx 836; WIN64-NEXT: jo .LBB17_1 837; WIN64-NEXT: # %bb.2: # %continue 838; WIN64-NEXT: movb $1, %al 839; WIN64-NEXT: retq 840; WIN64-NEXT: .LBB17_1: # %overflow 841; WIN64-NEXT: xorl %eax, %eax 842; WIN64-NEXT: retq 843; 844; WIN32-LABEL: smulobri32: 845; WIN32: # %bb.0: 846; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 847; WIN32-NEXT: imull {{[0-9]+}}(%esp), %eax 848; WIN32-NEXT: jo LBB17_1 849; WIN32-NEXT: # %bb.2: # %continue 850; WIN32-NEXT: movb $1, %al 851; WIN32-NEXT: retl 852; WIN32-NEXT: LBB17_1: # %overflow 853; WIN32-NEXT: xorl %eax, %eax 854; WIN32-NEXT: retl 855 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 856 %val = extractvalue {i32, i1} %t, 0 857 %obit = extractvalue {i32, i1} %t, 1 858 br i1 %obit, label %overflow, label %continue, !prof !0 859 860overflow: 861 ret i1 false 862 863continue: 864 ret i1 true 865} 866 867define zeroext i1 @smulobri64(i64 %v1, i64 %v2) { 868; SDAG-LABEL: smulobri64: 869; SDAG: # %bb.0: 870; SDAG-NEXT: imulq %rsi, %rdi 871; SDAG-NEXT: jo .LBB18_1 872; SDAG-NEXT: # %bb.2: # %continue 873; SDAG-NEXT: movb $1, %al 874; SDAG-NEXT: retq 875; SDAG-NEXT: .LBB18_1: # %overflow 876; SDAG-NEXT: xorl %eax, %eax 877; SDAG-NEXT: retq 878; 879; FAST-LABEL: smulobri64: 880; FAST: # %bb.0: 881; FAST-NEXT: imulq %rsi, %rdi 882; FAST-NEXT: jo .LBB18_1 883; FAST-NEXT: # %bb.2: # %continue 884; FAST-NEXT: movb $1, %al 885; FAST-NEXT: andb $1, %al 886; FAST-NEXT: movzbl %al, %eax 887; FAST-NEXT: retq 888; FAST-NEXT: .LBB18_1: # %overflow 889; FAST-NEXT: xorl %eax, %eax 890; FAST-NEXT: andb $1, %al 891; FAST-NEXT: movzbl %al, %eax 892; FAST-NEXT: retq 893; 894; WIN64-LABEL: smulobri64: 895; WIN64: # %bb.0: 896; WIN64-NEXT: imulq %rdx, %rcx 897; WIN64-NEXT: jo .LBB18_1 898; WIN64-NEXT: # %bb.2: # %continue 899; WIN64-NEXT: movb $1, %al 900; WIN64-NEXT: retq 901; WIN64-NEXT: .LBB18_1: # %overflow 902; WIN64-NEXT: xorl %eax, %eax 903; WIN64-NEXT: retq 904; 905; WIN32-LABEL: smulobri64: 906; WIN32: # %bb.0: 907; WIN32-NEXT: pushl %edi 908; WIN32-NEXT: pushl %esi 909; WIN32-NEXT: pushl %eax 910; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 911; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 912; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 913; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 914; WIN32-NEXT: movl $0, (%esp) 915; WIN32-NEXT: movl %esp, %edi 916; WIN32-NEXT: pushl %edi 917; WIN32-NEXT: pushl %esi 918; WIN32-NEXT: pushl %edx 919; WIN32-NEXT: pushl %ecx 920; WIN32-NEXT: pushl %eax 921; WIN32-NEXT: calll ___mulodi4 922; WIN32-NEXT: addl $20, %esp 923; WIN32-NEXT: cmpl $0, (%esp) 924; WIN32-NEXT: jne LBB18_1 925; WIN32-NEXT: # %bb.3: # %continue 926; WIN32-NEXT: movb $1, %al 927; WIN32-NEXT: LBB18_2: # %overflow 928; WIN32-NEXT: addl $4, %esp 929; WIN32-NEXT: popl %esi 930; WIN32-NEXT: popl %edi 931; WIN32-NEXT: retl 932; WIN32-NEXT: LBB18_1: # %overflow 933; WIN32-NEXT: xorl %eax, %eax 934; WIN32-NEXT: jmp LBB18_2 935 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 936 %val = extractvalue {i64, i1} %t, 0 937 %obit = extractvalue {i64, i1} %t, 1 938 br i1 %obit, label %overflow, label %continue, !prof !0 939 940overflow: 941 ret i1 false 942 943continue: 944 ret i1 true 945} 946 947define zeroext i1 @umulobri8(i8 %v1, i8 %v2) { 948; SDAG-LABEL: umulobri8: 949; SDAG: # %bb.0: 950; SDAG-NEXT: movl %edi, %eax 951; SDAG-NEXT: # kill: def $al killed $al killed $eax 952; SDAG-NEXT: mulb %sil 953; SDAG-NEXT: jo .LBB19_1 954; SDAG-NEXT: # %bb.2: # %continue 955; SDAG-NEXT: movb $1, %al 956; SDAG-NEXT: retq 957; SDAG-NEXT: .LBB19_1: # %overflow 958; SDAG-NEXT: xorl %eax, %eax 959; SDAG-NEXT: retq 960; 961; FAST-LABEL: umulobri8: 962; FAST: # %bb.0: 963; FAST-NEXT: movl %edi, %eax 964; FAST-NEXT: # kill: def $al killed $al killed $eax 965; FAST-NEXT: mulb %sil 966; FAST-NEXT: seto %al 967; FAST-NEXT: testb $1, %al 968; FAST-NEXT: jne .LBB19_1 969; FAST-NEXT: # %bb.2: # %continue 970; FAST-NEXT: movb $1, %al 971; FAST-NEXT: andb $1, %al 972; FAST-NEXT: movzbl %al, %eax 973; FAST-NEXT: retq 974; FAST-NEXT: .LBB19_1: # %overflow 975; FAST-NEXT: xorl %eax, %eax 976; FAST-NEXT: andb $1, %al 977; FAST-NEXT: movzbl %al, %eax 978; FAST-NEXT: retq 979; 980; WIN64-LABEL: umulobri8: 981; WIN64: # %bb.0: 982; WIN64-NEXT: movl %ecx, %eax 983; WIN64-NEXT: mulb %dl 984; WIN64-NEXT: jo .LBB19_1 985; WIN64-NEXT: # %bb.2: # %continue 986; WIN64-NEXT: movb $1, %al 987; WIN64-NEXT: retq 988; WIN64-NEXT: .LBB19_1: # %overflow 989; WIN64-NEXT: xorl %eax, %eax 990; WIN64-NEXT: retq 991; 992; WIN32-LABEL: umulobri8: 993; WIN32: # %bb.0: 994; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al 995; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 996; WIN32-NEXT: jo LBB19_1 997; WIN32-NEXT: # %bb.2: # %continue 998; WIN32-NEXT: movb $1, %al 999; WIN32-NEXT: retl 1000; WIN32-NEXT: LBB19_1: # %overflow 1001; WIN32-NEXT: xorl %eax, %eax 1002; WIN32-NEXT: retl 1003 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1004 %val = extractvalue {i8, i1} %t, 0 1005 %obit = extractvalue {i8, i1} %t, 1 1006 br i1 %obit, label %overflow, label %continue, !prof !0 1007 1008overflow: 1009 ret i1 false 1010 1011continue: 1012 ret i1 true 1013} 1014 1015define zeroext i1 @umulobri16(i16 %v1, i16 %v2) { 1016; SDAG-LABEL: umulobri16: 1017; SDAG: # %bb.0: 1018; SDAG-NEXT: movl %edi, %eax 1019; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 1020; SDAG-NEXT: mulw %si 1021; SDAG-NEXT: jo .LBB20_1 1022; SDAG-NEXT: # %bb.2: # %continue 1023; SDAG-NEXT: movb $1, %al 1024; SDAG-NEXT: retq 1025; SDAG-NEXT: .LBB20_1: # %overflow 1026; SDAG-NEXT: xorl %eax, %eax 1027; SDAG-NEXT: retq 1028; 1029; FAST-LABEL: umulobri16: 1030; FAST: # %bb.0: 1031; FAST-NEXT: movl %edi, %eax 1032; FAST-NEXT: # kill: def $ax killed $ax killed $eax 1033; FAST-NEXT: mulw %si 1034; FAST-NEXT: seto %al 1035; FAST-NEXT: testb $1, %al 1036; FAST-NEXT: jne .LBB20_1 1037; FAST-NEXT: # %bb.2: # %continue 1038; FAST-NEXT: movb $1, %al 1039; FAST-NEXT: andb $1, %al 1040; FAST-NEXT: movzbl %al, %eax 1041; FAST-NEXT: retq 1042; FAST-NEXT: .LBB20_1: # %overflow 1043; FAST-NEXT: xorl %eax, %eax 1044; FAST-NEXT: andb $1, %al 1045; FAST-NEXT: movzbl %al, %eax 1046; FAST-NEXT: retq 1047; 1048; WIN64-LABEL: umulobri16: 1049; WIN64: # %bb.0: 1050; WIN64-NEXT: movl %ecx, %eax 1051; WIN64-NEXT: mulw %dx 1052; WIN64-NEXT: jo .LBB20_1 1053; WIN64-NEXT: # %bb.2: # %continue 1054; WIN64-NEXT: movb $1, %al 1055; WIN64-NEXT: retq 1056; WIN64-NEXT: .LBB20_1: # %overflow 1057; WIN64-NEXT: xorl %eax, %eax 1058; WIN64-NEXT: retq 1059; 1060; WIN32-LABEL: umulobri16: 1061; WIN32: # %bb.0: 1062; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1063; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 1064; WIN32-NEXT: jo LBB20_1 1065; WIN32-NEXT: # %bb.2: # %continue 1066; WIN32-NEXT: movb $1, %al 1067; WIN32-NEXT: retl 1068; WIN32-NEXT: LBB20_1: # %overflow 1069; WIN32-NEXT: xorl %eax, %eax 1070; WIN32-NEXT: retl 1071 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 1072 %val = extractvalue {i16, i1} %t, 0 1073 %obit = extractvalue {i16, i1} %t, 1 1074 br i1 %obit, label %overflow, label %continue, !prof !0 1075 1076overflow: 1077 ret i1 false 1078 1079continue: 1080 ret i1 true 1081} 1082 1083define zeroext i1 @umulobri32(i32 %v1, i32 %v2) { 1084; SDAG-LABEL: umulobri32: 1085; SDAG: # %bb.0: 1086; SDAG-NEXT: movl %edi, %eax 1087; SDAG-NEXT: mull %esi 1088; SDAG-NEXT: jo .LBB21_1 1089; SDAG-NEXT: # %bb.2: # %continue 1090; SDAG-NEXT: movb $1, %al 1091; SDAG-NEXT: retq 1092; SDAG-NEXT: .LBB21_1: # %overflow 1093; SDAG-NEXT: xorl %eax, %eax 1094; SDAG-NEXT: retq 1095; 1096; FAST-LABEL: umulobri32: 1097; FAST: # %bb.0: 1098; FAST-NEXT: movl %edi, %eax 1099; FAST-NEXT: mull %esi 1100; FAST-NEXT: jo .LBB21_1 1101; FAST-NEXT: # %bb.2: # %continue 1102; FAST-NEXT: movb $1, %al 1103; FAST-NEXT: andb $1, %al 1104; FAST-NEXT: movzbl %al, %eax 1105; FAST-NEXT: retq 1106; FAST-NEXT: .LBB21_1: # %overflow 1107; FAST-NEXT: xorl %eax, %eax 1108; FAST-NEXT: andb $1, %al 1109; FAST-NEXT: movzbl %al, %eax 1110; FAST-NEXT: retq 1111; 1112; WIN64-LABEL: umulobri32: 1113; WIN64: # %bb.0: 1114; WIN64-NEXT: movl %ecx, %eax 1115; WIN64-NEXT: mull %edx 1116; WIN64-NEXT: jo .LBB21_1 1117; WIN64-NEXT: # %bb.2: # %continue 1118; WIN64-NEXT: movb $1, %al 1119; WIN64-NEXT: retq 1120; WIN64-NEXT: .LBB21_1: # %overflow 1121; WIN64-NEXT: xorl %eax, %eax 1122; WIN64-NEXT: retq 1123; 1124; WIN32-LABEL: umulobri32: 1125; WIN32: # %bb.0: 1126; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1127; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1128; WIN32-NEXT: jo LBB21_1 1129; WIN32-NEXT: # %bb.2: # %continue 1130; WIN32-NEXT: movb $1, %al 1131; WIN32-NEXT: retl 1132; WIN32-NEXT: LBB21_1: # %overflow 1133; WIN32-NEXT: xorl %eax, %eax 1134; WIN32-NEXT: retl 1135 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 1136 %val = extractvalue {i32, i1} %t, 0 1137 %obit = extractvalue {i32, i1} %t, 1 1138 br i1 %obit, label %overflow, label %continue, !prof !0 1139 1140overflow: 1141 ret i1 false 1142 1143continue: 1144 ret i1 true 1145} 1146 1147define zeroext i1 @umulobri64(i64 %v1, i64 %v2) { 1148; SDAG-LABEL: umulobri64: 1149; SDAG: # %bb.0: 1150; SDAG-NEXT: movq %rdi, %rax 1151; SDAG-NEXT: mulq %rsi 1152; SDAG-NEXT: jo .LBB22_1 1153; SDAG-NEXT: # %bb.2: # %continue 1154; SDAG-NEXT: movb $1, %al 1155; SDAG-NEXT: retq 1156; SDAG-NEXT: .LBB22_1: # %overflow 1157; SDAG-NEXT: xorl %eax, %eax 1158; SDAG-NEXT: retq 1159; 1160; FAST-LABEL: umulobri64: 1161; FAST: # %bb.0: 1162; FAST-NEXT: movq %rdi, %rax 1163; FAST-NEXT: mulq %rsi 1164; FAST-NEXT: jo .LBB22_1 1165; FAST-NEXT: # %bb.2: # %continue 1166; FAST-NEXT: movb $1, %al 1167; FAST-NEXT: andb $1, %al 1168; FAST-NEXT: movzbl %al, %eax 1169; FAST-NEXT: retq 1170; FAST-NEXT: .LBB22_1: # %overflow 1171; FAST-NEXT: xorl %eax, %eax 1172; FAST-NEXT: andb $1, %al 1173; FAST-NEXT: movzbl %al, %eax 1174; FAST-NEXT: retq 1175; 1176; WIN64-LABEL: umulobri64: 1177; WIN64: # %bb.0: 1178; WIN64-NEXT: movq %rcx, %rax 1179; WIN64-NEXT: mulq %rdx 1180; WIN64-NEXT: jo .LBB22_1 1181; WIN64-NEXT: # %bb.2: # %continue 1182; WIN64-NEXT: movb $1, %al 1183; WIN64-NEXT: retq 1184; WIN64-NEXT: .LBB22_1: # %overflow 1185; WIN64-NEXT: xorl %eax, %eax 1186; WIN64-NEXT: retq 1187; 1188; WIN32-LABEL: umulobri64: 1189; WIN32: # %bb.0: 1190; WIN32-NEXT: pushl %ebp 1191; WIN32-NEXT: pushl %ebx 1192; WIN32-NEXT: pushl %edi 1193; WIN32-NEXT: pushl %esi 1194; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 1195; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1196; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1197; WIN32-NEXT: testl %esi, %esi 1198; WIN32-NEXT: setne %dl 1199; WIN32-NEXT: testl %eax, %eax 1200; WIN32-NEXT: setne %bl 1201; WIN32-NEXT: andb %dl, %bl 1202; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1203; WIN32-NEXT: movl %eax, %edi 1204; WIN32-NEXT: seto %bh 1205; WIN32-NEXT: movl %esi, %eax 1206; WIN32-NEXT: mull %ebp 1207; WIN32-NEXT: movl %eax, %esi 1208; WIN32-NEXT: seto %cl 1209; WIN32-NEXT: orb %bh, %cl 1210; WIN32-NEXT: addl %edi, %esi 1211; WIN32-NEXT: movl %ebp, %eax 1212; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1213; WIN32-NEXT: addl %esi, %edx 1214; WIN32-NEXT: setb %al 1215; WIN32-NEXT: orb %cl, %al 1216; WIN32-NEXT: orb %bl, %al 1217; WIN32-NEXT: subb $1, %al 1218; WIN32-NEXT: je LBB22_1 1219; WIN32-NEXT: # %bb.3: # %continue 1220; WIN32-NEXT: movb $1, %al 1221; WIN32-NEXT: LBB22_2: # %overflow 1222; WIN32-NEXT: popl %esi 1223; WIN32-NEXT: popl %edi 1224; WIN32-NEXT: popl %ebx 1225; WIN32-NEXT: popl %ebp 1226; WIN32-NEXT: retl 1227; WIN32-NEXT: LBB22_1: # %overflow 1228; WIN32-NEXT: xorl %eax, %eax 1229; WIN32-NEXT: jmp LBB22_2 1230 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 1231 %val = extractvalue {i64, i1} %t, 0 1232 %obit = extractvalue {i64, i1} %t, 1 1233 br i1 %obit, label %overflow, label %continue, !prof !0 1234 1235overflow: 1236 ret i1 false 1237 1238continue: 1239 ret i1 true 1240} 1241 1242define i1 @bug27873(i64 %c1, i1 %c2) { 1243; LINUX-LABEL: bug27873: 1244; LINUX: # %bb.0: 1245; LINUX-NEXT: movq %rdi, %rax 1246; LINUX-NEXT: movl $160, %ecx 1247; LINUX-NEXT: mulq %rcx 1248; LINUX-NEXT: seto %al 1249; LINUX-NEXT: orb %sil, %al 1250; LINUX-NEXT: retq 1251; 1252; WIN64-LABEL: bug27873: 1253; WIN64: # %bb.0: 1254; WIN64-NEXT: movl %edx, %r8d 1255; WIN64-NEXT: movq %rcx, %rax 1256; WIN64-NEXT: movl $160, %ecx 1257; WIN64-NEXT: mulq %rcx 1258; WIN64-NEXT: seto %al 1259; WIN64-NEXT: orb %r8b, %al 1260; WIN64-NEXT: retq 1261; 1262; WIN32-LABEL: bug27873: 1263; WIN32: # %bb.0: 1264; WIN32-NEXT: pushl %ebx 1265; WIN32-NEXT: movl $160, %eax 1266; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1267; WIN32-NEXT: movl %eax, %ecx 1268; WIN32-NEXT: seto %bl 1269; WIN32-NEXT: movl $160, %eax 1270; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1271; WIN32-NEXT: addl %ecx, %edx 1272; WIN32-NEXT: setb %al 1273; WIN32-NEXT: orb %bl, %al 1274; WIN32-NEXT: orb {{[0-9]+}}(%esp), %al 1275; WIN32-NEXT: popl %ebx 1276; WIN32-NEXT: retl 1277 %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160) 1278 %mul.overflow = extractvalue { i64, i1 } %mul, 1 1279 %x1 = or i1 %c2, %mul.overflow 1280 ret i1 %x1 1281} 1282 1283define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) { 1284; SDAG-LABEL: smuloi8_load: 1285; SDAG: # %bb.0: 1286; SDAG-NEXT: movl %esi, %eax 1287; SDAG-NEXT: # kill: def $al killed $al killed $eax 1288; SDAG-NEXT: imulb (%rdi) 1289; SDAG-NEXT: seto %cl 1290; SDAG-NEXT: movb %al, (%rdx) 1291; SDAG-NEXT: movl %ecx, %eax 1292; SDAG-NEXT: retq 1293; 1294; FAST-LABEL: smuloi8_load: 1295; FAST: # %bb.0: 1296; FAST-NEXT: movb (%rdi), %al 1297; FAST-NEXT: imulb %sil 1298; FAST-NEXT: seto %cl 1299; FAST-NEXT: movb %al, (%rdx) 1300; FAST-NEXT: andb $1, %cl 1301; FAST-NEXT: movzbl %cl, %eax 1302; FAST-NEXT: retq 1303; 1304; WIN64-LABEL: smuloi8_load: 1305; WIN64: # %bb.0: 1306; WIN64-NEXT: movl %edx, %eax 1307; WIN64-NEXT: imulb (%rcx) 1308; WIN64-NEXT: seto %cl 1309; WIN64-NEXT: movb %al, (%r8) 1310; WIN64-NEXT: movl %ecx, %eax 1311; WIN64-NEXT: retq 1312; 1313; WIN32-LABEL: smuloi8_load: 1314; WIN32: # %bb.0: 1315; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1316; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1317; WIN32-NEXT: movb (%eax), %al 1318; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 1319; WIN32-NEXT: seto %cl 1320; WIN32-NEXT: movb %al, (%edx) 1321; WIN32-NEXT: movl %ecx, %eax 1322; WIN32-NEXT: retl 1323 %v1 = load i8, i8* %ptr1 1324 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 1325 %val = extractvalue {i8, i1} %t, 0 1326 %obit = extractvalue {i8, i1} %t, 1 1327 store i8 %val, i8* %res 1328 ret i1 %obit 1329} 1330 1331define zeroext i1 @smuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) { 1332; SDAG-LABEL: smuloi8_load2: 1333; SDAG: # %bb.0: 1334; SDAG-NEXT: movl %edi, %eax 1335; SDAG-NEXT: # kill: def $al killed $al killed $eax 1336; SDAG-NEXT: imulb (%rsi) 1337; SDAG-NEXT: seto %cl 1338; SDAG-NEXT: movb %al, (%rdx) 1339; SDAG-NEXT: movl %ecx, %eax 1340; SDAG-NEXT: retq 1341; 1342; FAST-LABEL: smuloi8_load2: 1343; FAST: # %bb.0: 1344; FAST-NEXT: movl %edi, %eax 1345; FAST-NEXT: # kill: def $al killed $al killed $eax 1346; FAST-NEXT: imulb (%rsi) 1347; FAST-NEXT: seto %cl 1348; FAST-NEXT: movb %al, (%rdx) 1349; FAST-NEXT: andb $1, %cl 1350; FAST-NEXT: movzbl %cl, %eax 1351; FAST-NEXT: retq 1352; 1353; WIN64-LABEL: smuloi8_load2: 1354; WIN64: # %bb.0: 1355; WIN64-NEXT: movl %ecx, %eax 1356; WIN64-NEXT: imulb (%rdx) 1357; WIN64-NEXT: seto %cl 1358; WIN64-NEXT: movb %al, (%r8) 1359; WIN64-NEXT: movl %ecx, %eax 1360; WIN64-NEXT: retq 1361; 1362; WIN32-LABEL: smuloi8_load2: 1363; WIN32: # %bb.0: 1364; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1365; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al 1366; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1367; WIN32-NEXT: imulb (%ecx) 1368; WIN32-NEXT: seto %cl 1369; WIN32-NEXT: movb %al, (%edx) 1370; WIN32-NEXT: movl %ecx, %eax 1371; WIN32-NEXT: retl 1372 %v2 = load i8, i8* %ptr2 1373 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 1374 %val = extractvalue {i8, i1} %t, 0 1375 %obit = extractvalue {i8, i1} %t, 1 1376 store i8 %val, i8* %res 1377 ret i1 %obit 1378} 1379 1380define zeroext i1 @smuloi16_load(i16* %ptr1, i16 %v2, i16* %res) { 1381; SDAG-LABEL: smuloi16_load: 1382; SDAG: # %bb.0: 1383; SDAG-NEXT: imulw (%rdi), %si 1384; SDAG-NEXT: seto %al 1385; SDAG-NEXT: movw %si, (%rdx) 1386; SDAG-NEXT: retq 1387; 1388; FAST-LABEL: smuloi16_load: 1389; FAST: # %bb.0: 1390; FAST-NEXT: imulw (%rdi), %si 1391; FAST-NEXT: seto %al 1392; FAST-NEXT: movw %si, (%rdx) 1393; FAST-NEXT: andb $1, %al 1394; FAST-NEXT: movzbl %al, %eax 1395; FAST-NEXT: retq 1396; 1397; WIN64-LABEL: smuloi16_load: 1398; WIN64: # %bb.0: 1399; WIN64-NEXT: imulw (%rcx), %dx 1400; WIN64-NEXT: seto %al 1401; WIN64-NEXT: movw %dx, (%r8) 1402; WIN64-NEXT: retq 1403; 1404; WIN32-LABEL: smuloi16_load: 1405; WIN32: # %bb.0: 1406; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1407; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1408; WIN32-NEXT: movzwl (%eax), %edx 1409; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %dx 1410; WIN32-NEXT: seto %al 1411; WIN32-NEXT: movw %dx, (%ecx) 1412; WIN32-NEXT: retl 1413 %v1 = load i16, i16* %ptr1 1414 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 1415 %val = extractvalue {i16, i1} %t, 0 1416 %obit = extractvalue {i16, i1} %t, 1 1417 store i16 %val, i16* %res 1418 ret i1 %obit 1419} 1420 1421define zeroext i1 @smuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) { 1422; SDAG-LABEL: smuloi16_load2: 1423; SDAG: # %bb.0: 1424; SDAG-NEXT: imulw (%rsi), %di 1425; SDAG-NEXT: seto %al 1426; SDAG-NEXT: movw %di, (%rdx) 1427; SDAG-NEXT: retq 1428; 1429; FAST-LABEL: smuloi16_load2: 1430; FAST: # %bb.0: 1431; FAST-NEXT: imulw (%rsi), %di 1432; FAST-NEXT: seto %al 1433; FAST-NEXT: movw %di, (%rdx) 1434; FAST-NEXT: andb $1, %al 1435; FAST-NEXT: movzbl %al, %eax 1436; FAST-NEXT: retq 1437; 1438; WIN64-LABEL: smuloi16_load2: 1439; WIN64: # %bb.0: 1440; WIN64-NEXT: imulw (%rdx), %cx 1441; WIN64-NEXT: seto %al 1442; WIN64-NEXT: movw %cx, (%r8) 1443; WIN64-NEXT: retq 1444; 1445; WIN32-LABEL: smuloi16_load2: 1446; WIN32: # %bb.0: 1447; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1448; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1449; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx 1450; WIN32-NEXT: imulw (%eax), %dx 1451; WIN32-NEXT: seto %al 1452; WIN32-NEXT: movw %dx, (%ecx) 1453; WIN32-NEXT: retl 1454 %v2 = load i16, i16* %ptr2 1455 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 1456 %val = extractvalue {i16, i1} %t, 0 1457 %obit = extractvalue {i16, i1} %t, 1 1458 store i16 %val, i16* %res 1459 ret i1 %obit 1460} 1461 1462define zeroext i1 @smuloi32_load(i32* %ptr1, i32 %v2, i32* %res) { 1463; SDAG-LABEL: smuloi32_load: 1464; SDAG: # %bb.0: 1465; SDAG-NEXT: imull (%rdi), %esi 1466; SDAG-NEXT: seto %al 1467; SDAG-NEXT: movl %esi, (%rdx) 1468; SDAG-NEXT: retq 1469; 1470; FAST-LABEL: smuloi32_load: 1471; FAST: # %bb.0: 1472; FAST-NEXT: imull (%rdi), %esi 1473; FAST-NEXT: seto %al 1474; FAST-NEXT: movl %esi, (%rdx) 1475; FAST-NEXT: andb $1, %al 1476; FAST-NEXT: movzbl %al, %eax 1477; FAST-NEXT: retq 1478; 1479; WIN64-LABEL: smuloi32_load: 1480; WIN64: # %bb.0: 1481; WIN64-NEXT: imull (%rcx), %edx 1482; WIN64-NEXT: seto %al 1483; WIN64-NEXT: movl %edx, (%r8) 1484; WIN64-NEXT: retq 1485; 1486; WIN32-LABEL: smuloi32_load: 1487; WIN32: # %bb.0: 1488; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1489; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1490; WIN32-NEXT: movl (%eax), %edx 1491; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx 1492; WIN32-NEXT: seto %al 1493; WIN32-NEXT: movl %edx, (%ecx) 1494; WIN32-NEXT: retl 1495 %v1 = load i32, i32* %ptr1 1496 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 1497 %val = extractvalue {i32, i1} %t, 0 1498 %obit = extractvalue {i32, i1} %t, 1 1499 store i32 %val, i32* %res 1500 ret i1 %obit 1501} 1502 1503define zeroext i1 @smuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) { 1504; SDAG-LABEL: smuloi32_load2: 1505; SDAG: # %bb.0: 1506; SDAG-NEXT: imull (%rsi), %edi 1507; SDAG-NEXT: seto %al 1508; SDAG-NEXT: movl %edi, (%rdx) 1509; SDAG-NEXT: retq 1510; 1511; FAST-LABEL: smuloi32_load2: 1512; FAST: # %bb.0: 1513; FAST-NEXT: imull (%rsi), %edi 1514; FAST-NEXT: seto %al 1515; FAST-NEXT: movl %edi, (%rdx) 1516; FAST-NEXT: andb $1, %al 1517; FAST-NEXT: movzbl %al, %eax 1518; FAST-NEXT: retq 1519; 1520; WIN64-LABEL: smuloi32_load2: 1521; WIN64: # %bb.0: 1522; WIN64-NEXT: imull (%rdx), %ecx 1523; WIN64-NEXT: seto %al 1524; WIN64-NEXT: movl %ecx, (%r8) 1525; WIN64-NEXT: retq 1526; 1527; WIN32-LABEL: smuloi32_load2: 1528; WIN32: # %bb.0: 1529; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1530; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1531; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1532; WIN32-NEXT: imull (%eax), %edx 1533; WIN32-NEXT: seto %al 1534; WIN32-NEXT: movl %edx, (%ecx) 1535; WIN32-NEXT: retl 1536 %v2 = load i32, i32* %ptr2 1537 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 1538 %val = extractvalue {i32, i1} %t, 0 1539 %obit = extractvalue {i32, i1} %t, 1 1540 store i32 %val, i32* %res 1541 ret i1 %obit 1542} 1543 1544define zeroext i1 @smuloi64_load(i64* %ptr1, i64 %v2, i64* %res) { 1545; SDAG-LABEL: smuloi64_load: 1546; SDAG: # %bb.0: 1547; SDAG-NEXT: imulq (%rdi), %rsi 1548; SDAG-NEXT: seto %al 1549; SDAG-NEXT: movq %rsi, (%rdx) 1550; SDAG-NEXT: retq 1551; 1552; FAST-LABEL: smuloi64_load: 1553; FAST: # %bb.0: 1554; FAST-NEXT: imulq (%rdi), %rsi 1555; FAST-NEXT: seto %al 1556; FAST-NEXT: movq %rsi, (%rdx) 1557; FAST-NEXT: andb $1, %al 1558; FAST-NEXT: movzbl %al, %eax 1559; FAST-NEXT: retq 1560; 1561; WIN64-LABEL: smuloi64_load: 1562; WIN64: # %bb.0: 1563; WIN64-NEXT: imulq (%rcx), %rdx 1564; WIN64-NEXT: seto %al 1565; WIN64-NEXT: movq %rdx, (%r8) 1566; WIN64-NEXT: retq 1567; 1568; WIN32-LABEL: smuloi64_load: 1569; WIN32: # %bb.0: 1570; WIN32-NEXT: pushl %ebx 1571; WIN32-NEXT: pushl %edi 1572; WIN32-NEXT: pushl %esi 1573; WIN32-NEXT: pushl %eax 1574; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1575; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1576; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1577; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1578; WIN32-NEXT: movl (%edx), %edi 1579; WIN32-NEXT: movl 4(%edx), %edx 1580; WIN32-NEXT: movl $0, (%esp) 1581; WIN32-NEXT: movl %esp, %ebx 1582; WIN32-NEXT: pushl %ebx 1583; WIN32-NEXT: pushl %ecx 1584; WIN32-NEXT: pushl %eax 1585; WIN32-NEXT: pushl %edx 1586; WIN32-NEXT: pushl %edi 1587; WIN32-NEXT: calll ___mulodi4 1588; WIN32-NEXT: addl $20, %esp 1589; WIN32-NEXT: cmpl $0, (%esp) 1590; WIN32-NEXT: setne %cl 1591; WIN32-NEXT: movl %eax, (%esi) 1592; WIN32-NEXT: movl %edx, 4(%esi) 1593; WIN32-NEXT: movl %ecx, %eax 1594; WIN32-NEXT: addl $4, %esp 1595; WIN32-NEXT: popl %esi 1596; WIN32-NEXT: popl %edi 1597; WIN32-NEXT: popl %ebx 1598; WIN32-NEXT: retl 1599 %v1 = load i64, i64* %ptr1 1600 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 1601 %val = extractvalue {i64, i1} %t, 0 1602 %obit = extractvalue {i64, i1} %t, 1 1603 store i64 %val, i64* %res 1604 ret i1 %obit 1605} 1606 1607define zeroext i1 @smuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) { 1608; SDAG-LABEL: smuloi64_load2: 1609; SDAG: # %bb.0: 1610; SDAG-NEXT: imulq (%rsi), %rdi 1611; SDAG-NEXT: seto %al 1612; SDAG-NEXT: movq %rdi, (%rdx) 1613; SDAG-NEXT: retq 1614; 1615; FAST-LABEL: smuloi64_load2: 1616; FAST: # %bb.0: 1617; FAST-NEXT: imulq (%rsi), %rdi 1618; FAST-NEXT: seto %al 1619; FAST-NEXT: movq %rdi, (%rdx) 1620; FAST-NEXT: andb $1, %al 1621; FAST-NEXT: movzbl %al, %eax 1622; FAST-NEXT: retq 1623; 1624; WIN64-LABEL: smuloi64_load2: 1625; WIN64: # %bb.0: 1626; WIN64-NEXT: imulq (%rdx), %rcx 1627; WIN64-NEXT: seto %al 1628; WIN64-NEXT: movq %rcx, (%r8) 1629; WIN64-NEXT: retq 1630; 1631; WIN32-LABEL: smuloi64_load2: 1632; WIN32: # %bb.0: 1633; WIN32-NEXT: pushl %ebx 1634; WIN32-NEXT: pushl %edi 1635; WIN32-NEXT: pushl %esi 1636; WIN32-NEXT: pushl %eax 1637; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1638; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1639; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1640; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1641; WIN32-NEXT: movl (%edx), %edi 1642; WIN32-NEXT: movl 4(%edx), %edx 1643; WIN32-NEXT: movl $0, (%esp) 1644; WIN32-NEXT: movl %esp, %ebx 1645; WIN32-NEXT: pushl %ebx 1646; WIN32-NEXT: pushl %edx 1647; WIN32-NEXT: pushl %edi 1648; WIN32-NEXT: pushl %ecx 1649; WIN32-NEXT: pushl %eax 1650; WIN32-NEXT: calll ___mulodi4 1651; WIN32-NEXT: addl $20, %esp 1652; WIN32-NEXT: cmpl $0, (%esp) 1653; WIN32-NEXT: setne %cl 1654; WIN32-NEXT: movl %eax, (%esi) 1655; WIN32-NEXT: movl %edx, 4(%esi) 1656; WIN32-NEXT: movl %ecx, %eax 1657; WIN32-NEXT: addl $4, %esp 1658; WIN32-NEXT: popl %esi 1659; WIN32-NEXT: popl %edi 1660; WIN32-NEXT: popl %ebx 1661; WIN32-NEXT: retl 1662 %v2 = load i64, i64* %ptr2 1663 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 1664 %val = extractvalue {i64, i1} %t, 0 1665 %obit = extractvalue {i64, i1} %t, 1 1666 store i64 %val, i64* %res 1667 ret i1 %obit 1668} 1669 1670define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) { 1671; SDAG-LABEL: umuloi8_load: 1672; SDAG: # %bb.0: 1673; SDAG-NEXT: movl %esi, %eax 1674; SDAG-NEXT: # kill: def $al killed $al killed $eax 1675; SDAG-NEXT: mulb (%rdi) 1676; SDAG-NEXT: seto %cl 1677; SDAG-NEXT: movb %al, (%rdx) 1678; SDAG-NEXT: movl %ecx, %eax 1679; SDAG-NEXT: retq 1680; 1681; FAST-LABEL: umuloi8_load: 1682; FAST: # %bb.0: 1683; FAST-NEXT: movb (%rdi), %al 1684; FAST-NEXT: mulb %sil 1685; FAST-NEXT: seto %cl 1686; FAST-NEXT: movb %al, (%rdx) 1687; FAST-NEXT: andb $1, %cl 1688; FAST-NEXT: movzbl %cl, %eax 1689; FAST-NEXT: retq 1690; 1691; WIN64-LABEL: umuloi8_load: 1692; WIN64: # %bb.0: 1693; WIN64-NEXT: movl %edx, %eax 1694; WIN64-NEXT: mulb (%rcx) 1695; WIN64-NEXT: seto %cl 1696; WIN64-NEXT: movb %al, (%r8) 1697; WIN64-NEXT: movl %ecx, %eax 1698; WIN64-NEXT: retq 1699; 1700; WIN32-LABEL: umuloi8_load: 1701; WIN32: # %bb.0: 1702; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1703; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1704; WIN32-NEXT: movb (%eax), %al 1705; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 1706; WIN32-NEXT: seto %cl 1707; WIN32-NEXT: movb %al, (%edx) 1708; WIN32-NEXT: movl %ecx, %eax 1709; WIN32-NEXT: retl 1710 %v1 = load i8, i8* %ptr1 1711 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1712 %val = extractvalue {i8, i1} %t, 0 1713 %obit = extractvalue {i8, i1} %t, 1 1714 store i8 %val, i8* %res 1715 ret i1 %obit 1716} 1717 1718define zeroext i1 @umuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) { 1719; SDAG-LABEL: umuloi8_load2: 1720; SDAG: # %bb.0: 1721; SDAG-NEXT: movl %edi, %eax 1722; SDAG-NEXT: # kill: def $al killed $al killed $eax 1723; SDAG-NEXT: mulb (%rsi) 1724; SDAG-NEXT: seto %cl 1725; SDAG-NEXT: movb %al, (%rdx) 1726; SDAG-NEXT: movl %ecx, %eax 1727; SDAG-NEXT: retq 1728; 1729; FAST-LABEL: umuloi8_load2: 1730; FAST: # %bb.0: 1731; FAST-NEXT: movl %edi, %eax 1732; FAST-NEXT: # kill: def $al killed $al killed $eax 1733; FAST-NEXT: mulb (%rsi) 1734; FAST-NEXT: seto %cl 1735; FAST-NEXT: movb %al, (%rdx) 1736; FAST-NEXT: andb $1, %cl 1737; FAST-NEXT: movzbl %cl, %eax 1738; FAST-NEXT: retq 1739; 1740; WIN64-LABEL: umuloi8_load2: 1741; WIN64: # %bb.0: 1742; WIN64-NEXT: movl %ecx, %eax 1743; WIN64-NEXT: mulb (%rdx) 1744; WIN64-NEXT: seto %cl 1745; WIN64-NEXT: movb %al, (%r8) 1746; WIN64-NEXT: movl %ecx, %eax 1747; WIN64-NEXT: retq 1748; 1749; WIN32-LABEL: umuloi8_load2: 1750; WIN32: # %bb.0: 1751; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1752; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al 1753; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1754; WIN32-NEXT: mulb (%ecx) 1755; WIN32-NEXT: seto %cl 1756; WIN32-NEXT: movb %al, (%edx) 1757; WIN32-NEXT: movl %ecx, %eax 1758; WIN32-NEXT: retl 1759 %v2 = load i8, i8* %ptr2 1760 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1761 %val = extractvalue {i8, i1} %t, 0 1762 %obit = extractvalue {i8, i1} %t, 1 1763 store i8 %val, i8* %res 1764 ret i1 %obit 1765} 1766 1767define zeroext i1 @umuloi16_load(i16* %ptr1, i16 %v2, i16* %res) { 1768; SDAG-LABEL: umuloi16_load: 1769; SDAG: # %bb.0: 1770; SDAG-NEXT: movq %rdx, %rcx 1771; SDAG-NEXT: movl %esi, %eax 1772; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 1773; SDAG-NEXT: mulw (%rdi) 1774; SDAG-NEXT: seto %dl 1775; SDAG-NEXT: movw %ax, (%rcx) 1776; SDAG-NEXT: movl %edx, %eax 1777; SDAG-NEXT: retq 1778; 1779; FAST-LABEL: umuloi16_load: 1780; FAST: # %bb.0: 1781; FAST-NEXT: movq %rdx, %rcx 1782; FAST-NEXT: movzwl (%rdi), %eax 1783; FAST-NEXT: mulw %si 1784; FAST-NEXT: seto %dl 1785; FAST-NEXT: movw %ax, (%rcx) 1786; FAST-NEXT: andb $1, %dl 1787; FAST-NEXT: movzbl %dl, %eax 1788; FAST-NEXT: retq 1789; 1790; WIN64-LABEL: umuloi16_load: 1791; WIN64: # %bb.0: 1792; WIN64-NEXT: movl %edx, %eax 1793; WIN64-NEXT: mulw (%rcx) 1794; WIN64-NEXT: seto %cl 1795; WIN64-NEXT: movw %ax, (%r8) 1796; WIN64-NEXT: movl %ecx, %eax 1797; WIN64-NEXT: retq 1798; 1799; WIN32-LABEL: umuloi16_load: 1800; WIN32: # %bb.0: 1801; WIN32-NEXT: pushl %esi 1802; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1803; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1804; WIN32-NEXT: movzwl (%eax), %eax 1805; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 1806; WIN32-NEXT: seto %cl 1807; WIN32-NEXT: movw %ax, (%esi) 1808; WIN32-NEXT: movl %ecx, %eax 1809; WIN32-NEXT: popl %esi 1810; WIN32-NEXT: retl 1811 %v1 = load i16, i16* %ptr1 1812 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 1813 %val = extractvalue {i16, i1} %t, 0 1814 %obit = extractvalue {i16, i1} %t, 1 1815 store i16 %val, i16* %res 1816 ret i1 %obit 1817} 1818 1819define zeroext i1 @umuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) { 1820; SDAG-LABEL: umuloi16_load2: 1821; SDAG: # %bb.0: 1822; SDAG-NEXT: movq %rdx, %rcx 1823; SDAG-NEXT: movl %edi, %eax 1824; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 1825; SDAG-NEXT: mulw (%rsi) 1826; SDAG-NEXT: seto %dl 1827; SDAG-NEXT: movw %ax, (%rcx) 1828; SDAG-NEXT: movl %edx, %eax 1829; SDAG-NEXT: retq 1830; 1831; FAST-LABEL: umuloi16_load2: 1832; FAST: # %bb.0: 1833; FAST-NEXT: movq %rdx, %rcx 1834; FAST-NEXT: movl %edi, %eax 1835; FAST-NEXT: # kill: def $ax killed $ax killed $eax 1836; FAST-NEXT: mulw (%rsi) 1837; FAST-NEXT: seto %dl 1838; FAST-NEXT: movw %ax, (%rcx) 1839; FAST-NEXT: andb $1, %dl 1840; FAST-NEXT: movzbl %dl, %eax 1841; FAST-NEXT: retq 1842; 1843; WIN64-LABEL: umuloi16_load2: 1844; WIN64: # %bb.0: 1845; WIN64-NEXT: movl %ecx, %eax 1846; WIN64-NEXT: mulw (%rdx) 1847; WIN64-NEXT: seto %cl 1848; WIN64-NEXT: movw %ax, (%r8) 1849; WIN64-NEXT: movl %ecx, %eax 1850; WIN64-NEXT: retq 1851; 1852; WIN32-LABEL: umuloi16_load2: 1853; WIN32: # %bb.0: 1854; WIN32-NEXT: pushl %esi 1855; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1856; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1857; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1858; WIN32-NEXT: mulw (%ecx) 1859; WIN32-NEXT: seto %cl 1860; WIN32-NEXT: movw %ax, (%esi) 1861; WIN32-NEXT: movl %ecx, %eax 1862; WIN32-NEXT: popl %esi 1863; WIN32-NEXT: retl 1864 %v2 = load i16, i16* %ptr2 1865 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 1866 %val = extractvalue {i16, i1} %t, 0 1867 %obit = extractvalue {i16, i1} %t, 1 1868 store i16 %val, i16* %res 1869 ret i1 %obit 1870} 1871 1872define zeroext i1 @umuloi32_load(i32* %ptr1, i32 %v2, i32* %res) { 1873; SDAG-LABEL: umuloi32_load: 1874; SDAG: # %bb.0: 1875; SDAG-NEXT: movq %rdx, %rcx 1876; SDAG-NEXT: movl %esi, %eax 1877; SDAG-NEXT: mull (%rdi) 1878; SDAG-NEXT: seto %dl 1879; SDAG-NEXT: movl %eax, (%rcx) 1880; SDAG-NEXT: movl %edx, %eax 1881; SDAG-NEXT: retq 1882; 1883; FAST-LABEL: umuloi32_load: 1884; FAST: # %bb.0: 1885; FAST-NEXT: movq %rdx, %rcx 1886; FAST-NEXT: movl (%rdi), %eax 1887; FAST-NEXT: mull %esi 1888; FAST-NEXT: seto %dl 1889; FAST-NEXT: movl %eax, (%rcx) 1890; FAST-NEXT: andb $1, %dl 1891; FAST-NEXT: movzbl %dl, %eax 1892; FAST-NEXT: retq 1893; 1894; WIN64-LABEL: umuloi32_load: 1895; WIN64: # %bb.0: 1896; WIN64-NEXT: movl %edx, %eax 1897; WIN64-NEXT: mull (%rcx) 1898; WIN64-NEXT: seto %cl 1899; WIN64-NEXT: movl %eax, (%r8) 1900; WIN64-NEXT: movl %ecx, %eax 1901; WIN64-NEXT: retq 1902; 1903; WIN32-LABEL: umuloi32_load: 1904; WIN32: # %bb.0: 1905; WIN32-NEXT: pushl %esi 1906; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1907; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1908; WIN32-NEXT: movl (%eax), %eax 1909; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1910; WIN32-NEXT: seto %cl 1911; WIN32-NEXT: movl %eax, (%esi) 1912; WIN32-NEXT: movl %ecx, %eax 1913; WIN32-NEXT: popl %esi 1914; WIN32-NEXT: retl 1915 %v1 = load i32, i32* %ptr1 1916 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 1917 %val = extractvalue {i32, i1} %t, 0 1918 %obit = extractvalue {i32, i1} %t, 1 1919 store i32 %val, i32* %res 1920 ret i1 %obit 1921} 1922 1923define zeroext i1 @umuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) { 1924; SDAG-LABEL: umuloi32_load2: 1925; SDAG: # %bb.0: 1926; SDAG-NEXT: movq %rdx, %rcx 1927; SDAG-NEXT: movl %edi, %eax 1928; SDAG-NEXT: mull (%rsi) 1929; SDAG-NEXT: seto %dl 1930; SDAG-NEXT: movl %eax, (%rcx) 1931; SDAG-NEXT: movl %edx, %eax 1932; SDAG-NEXT: retq 1933; 1934; FAST-LABEL: umuloi32_load2: 1935; FAST: # %bb.0: 1936; FAST-NEXT: movq %rdx, %rcx 1937; FAST-NEXT: movl %edi, %eax 1938; FAST-NEXT: mull (%rsi) 1939; FAST-NEXT: seto %dl 1940; FAST-NEXT: movl %eax, (%rcx) 1941; FAST-NEXT: andb $1, %dl 1942; FAST-NEXT: movzbl %dl, %eax 1943; FAST-NEXT: retq 1944; 1945; WIN64-LABEL: umuloi32_load2: 1946; WIN64: # %bb.0: 1947; WIN64-NEXT: movl %ecx, %eax 1948; WIN64-NEXT: mull (%rdx) 1949; WIN64-NEXT: seto %cl 1950; WIN64-NEXT: movl %eax, (%r8) 1951; WIN64-NEXT: movl %ecx, %eax 1952; WIN64-NEXT: retq 1953; 1954; WIN32-LABEL: umuloi32_load2: 1955; WIN32: # %bb.0: 1956; WIN32-NEXT: pushl %esi 1957; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1958; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1959; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1960; WIN32-NEXT: mull (%ecx) 1961; WIN32-NEXT: seto %cl 1962; WIN32-NEXT: movl %eax, (%esi) 1963; WIN32-NEXT: movl %ecx, %eax 1964; WIN32-NEXT: popl %esi 1965; WIN32-NEXT: retl 1966 %v2 = load i32, i32* %ptr2 1967 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 1968 %val = extractvalue {i32, i1} %t, 0 1969 %obit = extractvalue {i32, i1} %t, 1 1970 store i32 %val, i32* %res 1971 ret i1 %obit 1972} 1973 1974define zeroext i1 @umuloi64_load(i64* %ptr1, i64 %v2, i64* %res) { 1975; SDAG-LABEL: umuloi64_load: 1976; SDAG: # %bb.0: 1977; SDAG-NEXT: movq %rdx, %rcx 1978; SDAG-NEXT: movq %rsi, %rax 1979; SDAG-NEXT: mulq (%rdi) 1980; SDAG-NEXT: seto %dl 1981; SDAG-NEXT: movq %rax, (%rcx) 1982; SDAG-NEXT: movl %edx, %eax 1983; SDAG-NEXT: retq 1984; 1985; FAST-LABEL: umuloi64_load: 1986; FAST: # %bb.0: 1987; FAST-NEXT: movq %rdx, %rcx 1988; FAST-NEXT: movq (%rdi), %rax 1989; FAST-NEXT: mulq %rsi 1990; FAST-NEXT: seto %dl 1991; FAST-NEXT: movq %rax, (%rcx) 1992; FAST-NEXT: andb $1, %dl 1993; FAST-NEXT: movzbl %dl, %eax 1994; FAST-NEXT: retq 1995; 1996; WIN64-LABEL: umuloi64_load: 1997; WIN64: # %bb.0: 1998; WIN64-NEXT: movq %rdx, %rax 1999; WIN64-NEXT: mulq (%rcx) 2000; WIN64-NEXT: seto %cl 2001; WIN64-NEXT: movq %rax, (%r8) 2002; WIN64-NEXT: movl %ecx, %eax 2003; WIN64-NEXT: retq 2004; 2005; WIN32-LABEL: umuloi64_load: 2006; WIN32: # %bb.0: 2007; WIN32-NEXT: pushl %ebp 2008; WIN32-NEXT: pushl %ebx 2009; WIN32-NEXT: pushl %edi 2010; WIN32-NEXT: pushl %esi 2011; WIN32-NEXT: pushl %eax 2012; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 2013; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2014; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2015; WIN32-NEXT: movl (%eax), %esi 2016; WIN32-NEXT: movl 4(%eax), %eax 2017; WIN32-NEXT: testl %ecx, %ecx 2018; WIN32-NEXT: setne %dl 2019; WIN32-NEXT: testl %eax, %eax 2020; WIN32-NEXT: setne %bl 2021; WIN32-NEXT: andb %dl, %bl 2022; WIN32-NEXT: mull %ebp 2023; WIN32-NEXT: movl %eax, %edi 2024; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 2025; WIN32-NEXT: movl %ecx, %eax 2026; WIN32-NEXT: mull %esi 2027; WIN32-NEXT: movl %eax, %ecx 2028; WIN32-NEXT: seto %bh 2029; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload 2030; WIN32-NEXT: addl %edi, %ecx 2031; WIN32-NEXT: movl %esi, %eax 2032; WIN32-NEXT: mull %ebp 2033; WIN32-NEXT: addl %ecx, %edx 2034; WIN32-NEXT: setb %cl 2035; WIN32-NEXT: orb %bh, %cl 2036; WIN32-NEXT: orb %bl, %cl 2037; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2038; WIN32-NEXT: movl %eax, (%esi) 2039; WIN32-NEXT: movl %edx, 4(%esi) 2040; WIN32-NEXT: movl %ecx, %eax 2041; WIN32-NEXT: addl $4, %esp 2042; WIN32-NEXT: popl %esi 2043; WIN32-NEXT: popl %edi 2044; WIN32-NEXT: popl %ebx 2045; WIN32-NEXT: popl %ebp 2046; WIN32-NEXT: retl 2047 %v1 = load i64, i64* %ptr1 2048 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 2049 %val = extractvalue {i64, i1} %t, 0 2050 %obit = extractvalue {i64, i1} %t, 1 2051 store i64 %val, i64* %res 2052 ret i1 %obit 2053} 2054 2055define zeroext i1 @umuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) { 2056; SDAG-LABEL: umuloi64_load2: 2057; SDAG: # %bb.0: 2058; SDAG-NEXT: movq %rdx, %rcx 2059; SDAG-NEXT: movq %rdi, %rax 2060; SDAG-NEXT: mulq (%rsi) 2061; SDAG-NEXT: seto %dl 2062; SDAG-NEXT: movq %rax, (%rcx) 2063; SDAG-NEXT: movl %edx, %eax 2064; SDAG-NEXT: retq 2065; 2066; FAST-LABEL: umuloi64_load2: 2067; FAST: # %bb.0: 2068; FAST-NEXT: movq %rdx, %rcx 2069; FAST-NEXT: movq %rdi, %rax 2070; FAST-NEXT: mulq (%rsi) 2071; FAST-NEXT: seto %dl 2072; FAST-NEXT: movq %rax, (%rcx) 2073; FAST-NEXT: andb $1, %dl 2074; FAST-NEXT: movzbl %dl, %eax 2075; FAST-NEXT: retq 2076; 2077; WIN64-LABEL: umuloi64_load2: 2078; WIN64: # %bb.0: 2079; WIN64-NEXT: movq %rcx, %rax 2080; WIN64-NEXT: mulq (%rdx) 2081; WIN64-NEXT: seto %cl 2082; WIN64-NEXT: movq %rax, (%r8) 2083; WIN64-NEXT: movl %ecx, %eax 2084; WIN64-NEXT: retq 2085; 2086; WIN32-LABEL: umuloi64_load2: 2087; WIN32: # %bb.0: 2088; WIN32-NEXT: pushl %ebp 2089; WIN32-NEXT: pushl %ebx 2090; WIN32-NEXT: pushl %edi 2091; WIN32-NEXT: pushl %esi 2092; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2093; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 2094; WIN32-NEXT: movl (%edx), %ebp 2095; WIN32-NEXT: movl 4(%edx), %esi 2096; WIN32-NEXT: testl %eax, %eax 2097; WIN32-NEXT: setne %dl 2098; WIN32-NEXT: testl %esi, %esi 2099; WIN32-NEXT: setne %bl 2100; WIN32-NEXT: andb %dl, %bl 2101; WIN32-NEXT: mull %ebp 2102; WIN32-NEXT: movl %eax, %edi 2103; WIN32-NEXT: seto %cl 2104; WIN32-NEXT: movl %esi, %eax 2105; WIN32-NEXT: mull {{[0-9]+}}(%esp) 2106; WIN32-NEXT: movl %eax, %esi 2107; WIN32-NEXT: seto %ch 2108; WIN32-NEXT: orb %cl, %ch 2109; WIN32-NEXT: addl %edi, %esi 2110; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2111; WIN32-NEXT: mull %ebp 2112; WIN32-NEXT: addl %esi, %edx 2113; WIN32-NEXT: setb %cl 2114; WIN32-NEXT: orb %ch, %cl 2115; WIN32-NEXT: orb %bl, %cl 2116; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2117; WIN32-NEXT: movl %eax, (%esi) 2118; WIN32-NEXT: movl %edx, 4(%esi) 2119; WIN32-NEXT: movl %ecx, %eax 2120; WIN32-NEXT: popl %esi 2121; WIN32-NEXT: popl %edi 2122; WIN32-NEXT: popl %ebx 2123; WIN32-NEXT: popl %ebp 2124; WIN32-NEXT: retl 2125 %v2 = load i64, i64* %ptr2 2126 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 2127 %val = extractvalue {i64, i1} %t, 0 2128 %obit = extractvalue {i64, i1} %t, 1 2129 store i64 %val, i64* %res 2130 ret i1 %obit 2131} 2132 2133declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone 2134declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone 2135declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone 2136declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone 2137declare {i8, i1} @llvm.umul.with.overflow.i8 (i8, i8 ) nounwind readnone 2138declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone 2139declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone 2140declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone 2141 2142!0 = !{!"branch_weights", i32 0, i32 2147483647} 2143