1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX 6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512dq,+avx512bw | FileCheck %s --check-prefix=AVX 7 8define <4 x i32> @add_op1_constant(i32* %p) nounwind { 9; SSE-LABEL: add_op1_constant: 10; SSE: # %bb.0: 11; SSE-NEXT: movl (%rdi), %eax 12; SSE-NEXT: addl $42, %eax 13; SSE-NEXT: movd %eax, %xmm0 14; SSE-NEXT: retq 15; 16; AVX-LABEL: add_op1_constant: 17; AVX: # %bb.0: 18; AVX-NEXT: movl (%rdi), %eax 19; AVX-NEXT: addl $42, %eax 20; AVX-NEXT: vmovd %eax, %xmm0 21; AVX-NEXT: retq 22 %x = load i32, i32* %p 23 %b = add i32 %x, 42 24 %r = insertelement <4 x i32> undef, i32 %b, i32 0 25 ret <4 x i32> %r 26} 27 28; Code and data size may increase by using more vector ops, so the transform is disabled here. 29 30define <4 x i32> @add_op1_constant_optsize(i32* %p) nounwind optsize { 31; SSE-LABEL: add_op1_constant_optsize: 32; SSE: # %bb.0: 33; SSE-NEXT: movl (%rdi), %eax 34; SSE-NEXT: addl $42, %eax 35; SSE-NEXT: movd %eax, %xmm0 36; SSE-NEXT: retq 37; 38; AVX-LABEL: add_op1_constant_optsize: 39; AVX: # %bb.0: 40; AVX-NEXT: movl (%rdi), %eax 41; AVX-NEXT: addl $42, %eax 42; AVX-NEXT: vmovd %eax, %xmm0 43; AVX-NEXT: retq 44 %x = load i32, i32* %p 45 %b = add i32 %x, 42 46 %r = insertelement <4 x i32> undef, i32 %b, i32 0 47 ret <4 x i32> %r 48} 49 50define <8 x i16> @add_op0_constant(i16* %p) nounwind { 51; SSE-LABEL: add_op0_constant: 52; SSE: # %bb.0: 53; SSE-NEXT: movzwl (%rdi), %eax 54; SSE-NEXT: addl $42, %eax 55; SSE-NEXT: movd %eax, %xmm0 56; SSE-NEXT: retq 57; 58; AVX-LABEL: add_op0_constant: 59; AVX: # %bb.0: 60; AVX-NEXT: movzwl (%rdi), %eax 61; AVX-NEXT: addl $42, %eax 62; AVX-NEXT: vmovd %eax, %xmm0 63; AVX-NEXT: retq 64 %x = load i16, i16* %p 65 %b = add i16 42, %x 66 %r = insertelement <8 x i16> undef, i16 %b, i32 0 67 ret <8 x i16> %r 68} 69 70define <2 x i64> @sub_op0_constant(i64* %p) nounwind { 71; SSE-LABEL: sub_op0_constant: 72; SSE: # %bb.0: 73; SSE-NEXT: movl $42, %eax 74; SSE-NEXT: subq (%rdi), %rax 75; SSE-NEXT: movq %rax, %xmm0 76; SSE-NEXT: retq 77; 78; AVX-LABEL: sub_op0_constant: 79; AVX: # %bb.0: 80; AVX-NEXT: movl $42, %eax 81; AVX-NEXT: subq (%rdi), %rax 82; AVX-NEXT: vmovq %rax, %xmm0 83; AVX-NEXT: retq 84 %x = load i64, i64* %p 85 %b = sub i64 42, %x 86 %r = insertelement <2 x i64> undef, i64 %b, i32 0 87 ret <2 x i64> %r 88} 89 90define <16 x i8> @sub_op1_constant(i8* %p) nounwind { 91; SSE-LABEL: sub_op1_constant: 92; SSE: # %bb.0: 93; SSE-NEXT: movb (%rdi), %al 94; SSE-NEXT: addb $-42, %al 95; SSE-NEXT: movzbl %al, %eax 96; SSE-NEXT: movd %eax, %xmm0 97; SSE-NEXT: retq 98; 99; AVX-LABEL: sub_op1_constant: 100; AVX: # %bb.0: 101; AVX-NEXT: movb (%rdi), %al 102; AVX-NEXT: addb $-42, %al 103; AVX-NEXT: movzbl %al, %eax 104; AVX-NEXT: vmovd %eax, %xmm0 105; AVX-NEXT: retq 106 %x = load i8, i8* %p 107 %b = sub i8 %x, 42 108 %r = insertelement <16 x i8> undef, i8 %b, i32 0 109 ret <16 x i8> %r 110} 111 112define <4 x i32> @mul_op1_constant(i32* %p) nounwind { 113; SSE-LABEL: mul_op1_constant: 114; SSE: # %bb.0: 115; SSE-NEXT: imull $42, (%rdi), %eax 116; SSE-NEXT: movd %eax, %xmm0 117; SSE-NEXT: retq 118; 119; AVX-LABEL: mul_op1_constant: 120; AVX: # %bb.0: 121; AVX-NEXT: imull $42, (%rdi), %eax 122; AVX-NEXT: vmovd %eax, %xmm0 123; AVX-NEXT: retq 124 %x = load i32, i32* %p 125 %b = mul i32 %x, 42 126 %r = insertelement <4 x i32> undef, i32 %b, i32 0 127 ret <4 x i32> %r 128} 129 130define <8 x i16> @mul_op0_constant(i16* %p) nounwind { 131; SSE-LABEL: mul_op0_constant: 132; SSE: # %bb.0: 133; SSE-NEXT: movzwl (%rdi), %eax 134; SSE-NEXT: imull $42, %eax, %eax 135; SSE-NEXT: movd %eax, %xmm0 136; SSE-NEXT: retq 137; 138; AVX-LABEL: mul_op0_constant: 139; AVX: # %bb.0: 140; AVX-NEXT: movzwl (%rdi), %eax 141; AVX-NEXT: imull $42, %eax, %eax 142; AVX-NEXT: vmovd %eax, %xmm0 143; AVX-NEXT: retq 144 %x = load i16, i16* %p 145 %b = mul i16 42, %x 146 %r = insertelement <8 x i16> undef, i16 %b, i32 0 147 ret <8 x i16> %r 148} 149 150define <4 x i32> @and_op1_constant(i32* %p) nounwind { 151; SSE-LABEL: and_op1_constant: 152; SSE: # %bb.0: 153; SSE-NEXT: movl (%rdi), %eax 154; SSE-NEXT: andl $42, %eax 155; SSE-NEXT: movd %eax, %xmm0 156; SSE-NEXT: retq 157; 158; AVX-LABEL: and_op1_constant: 159; AVX: # %bb.0: 160; AVX-NEXT: movl (%rdi), %eax 161; AVX-NEXT: andl $42, %eax 162; AVX-NEXT: vmovd %eax, %xmm0 163; AVX-NEXT: retq 164 %x = load i32, i32* %p 165 %b = and i32 %x, 42 166 %r = insertelement <4 x i32> undef, i32 %b, i32 0 167 ret <4 x i32> %r 168} 169 170define <2 x i64> @or_op1_constant(i64* %p) nounwind { 171; SSE-LABEL: or_op1_constant: 172; SSE: # %bb.0: 173; SSE-NEXT: movq (%rdi), %rax 174; SSE-NEXT: orq $42, %rax 175; SSE-NEXT: movq %rax, %xmm0 176; SSE-NEXT: retq 177; 178; AVX-LABEL: or_op1_constant: 179; AVX: # %bb.0: 180; AVX-NEXT: movq (%rdi), %rax 181; AVX-NEXT: orq $42, %rax 182; AVX-NEXT: vmovq %rax, %xmm0 183; AVX-NEXT: retq 184 %x = load i64, i64* %p 185 %b = or i64 %x, 42 186 %r = insertelement <2 x i64> undef, i64 %b, i32 0 187 ret <2 x i64> %r 188} 189 190define <8 x i16> @xor_op1_constant(i16* %p) nounwind { 191; SSE-LABEL: xor_op1_constant: 192; SSE: # %bb.0: 193; SSE-NEXT: movzwl (%rdi), %eax 194; SSE-NEXT: xorl $42, %eax 195; SSE-NEXT: movd %eax, %xmm0 196; SSE-NEXT: retq 197; 198; AVX-LABEL: xor_op1_constant: 199; AVX: # %bb.0: 200; AVX-NEXT: movzwl (%rdi), %eax 201; AVX-NEXT: xorl $42, %eax 202; AVX-NEXT: vmovd %eax, %xmm0 203; AVX-NEXT: retq 204 %x = load i16, i16* %p 205 %b = xor i16 %x, 42 206 %r = insertelement <8 x i16> undef, i16 %b, i32 0 207 ret <8 x i16> %r 208} 209 210define <4 x i32> @shl_op0_constant(i32* %p) nounwind { 211; SSE-LABEL: shl_op0_constant: 212; SSE: # %bb.0: 213; SSE-NEXT: movb (%rdi), %cl 214; SSE-NEXT: movl $42, %eax 215; SSE-NEXT: shll %cl, %eax 216; SSE-NEXT: movd %eax, %xmm0 217; SSE-NEXT: retq 218; 219; AVX-LABEL: shl_op0_constant: 220; AVX: # %bb.0: 221; AVX-NEXT: movb (%rdi), %cl 222; AVX-NEXT: movl $42, %eax 223; AVX-NEXT: shll %cl, %eax 224; AVX-NEXT: vmovd %eax, %xmm0 225; AVX-NEXT: retq 226 %x = load i32, i32* %p 227 %b = shl i32 42, %x 228 %r = insertelement <4 x i32> undef, i32 %b, i32 0 229 ret <4 x i32> %r 230} 231 232define <16 x i8> @shl_op1_constant(i8* %p) nounwind { 233; SSE-LABEL: shl_op1_constant: 234; SSE: # %bb.0: 235; SSE-NEXT: movb (%rdi), %al 236; SSE-NEXT: shlb $5, %al 237; SSE-NEXT: movzbl %al, %eax 238; SSE-NEXT: movd %eax, %xmm0 239; SSE-NEXT: retq 240; 241; AVX-LABEL: shl_op1_constant: 242; AVX: # %bb.0: 243; AVX-NEXT: movb (%rdi), %al 244; AVX-NEXT: shlb $5, %al 245; AVX-NEXT: movzbl %al, %eax 246; AVX-NEXT: vmovd %eax, %xmm0 247; AVX-NEXT: retq 248 %x = load i8, i8* %p 249 %b = shl i8 %x, 5 250 %r = insertelement <16 x i8> undef, i8 %b, i32 0 251 ret <16 x i8> %r 252} 253 254define <2 x i64> @lshr_op0_constant(i64* %p) nounwind { 255; SSE-LABEL: lshr_op0_constant: 256; SSE: # %bb.0: 257; SSE-NEXT: movb (%rdi), %cl 258; SSE-NEXT: movl $42, %eax 259; SSE-NEXT: shrq %cl, %rax 260; SSE-NEXT: movq %rax, %xmm0 261; SSE-NEXT: retq 262; 263; AVX-LABEL: lshr_op0_constant: 264; AVX: # %bb.0: 265; AVX-NEXT: movb (%rdi), %cl 266; AVX-NEXT: movl $42, %eax 267; AVX-NEXT: shrq %cl, %rax 268; AVX-NEXT: vmovq %rax, %xmm0 269; AVX-NEXT: retq 270 %x = load i64, i64* %p 271 %b = lshr i64 42, %x 272 %r = insertelement <2 x i64> undef, i64 %b, i32 0 273 ret <2 x i64> %r 274} 275 276define <4 x i32> @lshr_op1_constant(i32* %p) nounwind { 277; SSE-LABEL: lshr_op1_constant: 278; SSE: # %bb.0: 279; SSE-NEXT: movl (%rdi), %eax 280; SSE-NEXT: shrl $17, %eax 281; SSE-NEXT: movd %eax, %xmm0 282; SSE-NEXT: retq 283; 284; AVX-LABEL: lshr_op1_constant: 285; AVX: # %bb.0: 286; AVX-NEXT: movl (%rdi), %eax 287; AVX-NEXT: shrl $17, %eax 288; AVX-NEXT: vmovd %eax, %xmm0 289; AVX-NEXT: retq 290 %x = load i32, i32* %p 291 %b = lshr i32 %x, 17 292 %r = insertelement <4 x i32> undef, i32 %b, i32 0 293 ret <4 x i32> %r 294} 295 296define <8 x i16> @ashr_op0_constant(i16* %p) nounwind { 297; SSE-LABEL: ashr_op0_constant: 298; SSE: # %bb.0: 299; SSE-NEXT: movb (%rdi), %cl 300; SSE-NEXT: movl $-42, %eax 301; SSE-NEXT: sarl %cl, %eax 302; SSE-NEXT: movd %eax, %xmm0 303; SSE-NEXT: retq 304; 305; AVX-LABEL: ashr_op0_constant: 306; AVX: # %bb.0: 307; AVX-NEXT: movb (%rdi), %cl 308; AVX-NEXT: movl $-42, %eax 309; AVX-NEXT: sarl %cl, %eax 310; AVX-NEXT: vmovd %eax, %xmm0 311; AVX-NEXT: retq 312 %x = load i16, i16* %p 313 %b = ashr i16 -42, %x 314 %r = insertelement <8 x i16> undef, i16 %b, i32 0 315 ret <8 x i16> %r 316} 317 318define <8 x i16> @ashr_op1_constant(i16* %p) nounwind { 319; SSE-LABEL: ashr_op1_constant: 320; SSE: # %bb.0: 321; SSE-NEXT: movswl (%rdi), %eax 322; SSE-NEXT: sarl $7, %eax 323; SSE-NEXT: movd %eax, %xmm0 324; SSE-NEXT: retq 325; 326; AVX-LABEL: ashr_op1_constant: 327; AVX: # %bb.0: 328; AVX-NEXT: movswl (%rdi), %eax 329; AVX-NEXT: sarl $7, %eax 330; AVX-NEXT: vmovd %eax, %xmm0 331; AVX-NEXT: retq 332 %x = load i16, i16* %p 333 %b = ashr i16 %x, 7 334 %r = insertelement <8 x i16> undef, i16 %b, i32 0 335 ret <8 x i16> %r 336} 337 338define <4 x i32> @sdiv_op0_constant(i32* %p) nounwind { 339; SSE-LABEL: sdiv_op0_constant: 340; SSE: # %bb.0: 341; SSE-NEXT: movl $42, %eax 342; SSE-NEXT: xorl %edx, %edx 343; SSE-NEXT: idivl (%rdi) 344; SSE-NEXT: movd %eax, %xmm0 345; SSE-NEXT: retq 346; 347; AVX-LABEL: sdiv_op0_constant: 348; AVX: # %bb.0: 349; AVX-NEXT: movl $42, %eax 350; AVX-NEXT: xorl %edx, %edx 351; AVX-NEXT: idivl (%rdi) 352; AVX-NEXT: vmovd %eax, %xmm0 353; AVX-NEXT: retq 354 %x = load i32, i32* %p 355 %b = sdiv i32 42, %x 356 %r = insertelement <4 x i32> undef, i32 %b, i32 0 357 ret <4 x i32> %r 358} 359 360define <8 x i16> @sdiv_op1_constant(i16* %p) nounwind { 361; SSE-LABEL: sdiv_op1_constant: 362; SSE: # %bb.0: 363; SSE-NEXT: movswl (%rdi), %eax 364; SSE-NEXT: imull $-15603, %eax, %ecx # imm = 0xC30D 365; SSE-NEXT: shrl $16, %ecx 366; SSE-NEXT: addl %eax, %ecx 367; SSE-NEXT: movzwl %cx, %eax 368; SSE-NEXT: movswl %ax, %ecx 369; SSE-NEXT: shrl $15, %eax 370; SSE-NEXT: sarl $5, %ecx 371; SSE-NEXT: addl %eax, %ecx 372; SSE-NEXT: movd %ecx, %xmm0 373; SSE-NEXT: retq 374; 375; AVX-LABEL: sdiv_op1_constant: 376; AVX: # %bb.0: 377; AVX-NEXT: movswl (%rdi), %eax 378; AVX-NEXT: imull $-15603, %eax, %ecx # imm = 0xC30D 379; AVX-NEXT: shrl $16, %ecx 380; AVX-NEXT: addl %eax, %ecx 381; AVX-NEXT: movzwl %cx, %eax 382; AVX-NEXT: movswl %ax, %ecx 383; AVX-NEXT: shrl $15, %eax 384; AVX-NEXT: sarl $5, %ecx 385; AVX-NEXT: addl %eax, %ecx 386; AVX-NEXT: vmovd %ecx, %xmm0 387; AVX-NEXT: retq 388 %x = load i16, i16* %p 389 %b = sdiv i16 %x, 42 390 %r = insertelement <8 x i16> undef, i16 %b, i32 0 391 ret <8 x i16> %r 392} 393 394define <8 x i16> @srem_op0_constant(i16* %p) nounwind { 395; SSE-LABEL: srem_op0_constant: 396; SSE: # %bb.0: 397; SSE-NEXT: movw $42, %ax 398; SSE-NEXT: xorl %edx, %edx 399; SSE-NEXT: idivw (%rdi) 400; SSE-NEXT: # kill: def $dx killed $dx def $edx 401; SSE-NEXT: movd %edx, %xmm0 402; SSE-NEXT: retq 403; 404; AVX-LABEL: srem_op0_constant: 405; AVX: # %bb.0: 406; AVX-NEXT: movw $42, %ax 407; AVX-NEXT: xorl %edx, %edx 408; AVX-NEXT: idivw (%rdi) 409; AVX-NEXT: # kill: def $dx killed $dx def $edx 410; AVX-NEXT: vmovd %edx, %xmm0 411; AVX-NEXT: retq 412 %x = load i16, i16* %p 413 %b = srem i16 42, %x 414 %r = insertelement <8 x i16> undef, i16 %b, i32 0 415 ret <8 x i16> %r 416} 417 418define <4 x i32> @srem_op1_constant(i32* %p) nounwind { 419; SSE-LABEL: srem_op1_constant: 420; SSE: # %bb.0: 421; SSE-NEXT: movslq (%rdi), %rax 422; SSE-NEXT: imulq $818089009, %rax, %rcx # imm = 0x30C30C31 423; SSE-NEXT: movq %rcx, %rdx 424; SSE-NEXT: shrq $63, %rdx 425; SSE-NEXT: sarq $35, %rcx 426; SSE-NEXT: addl %edx, %ecx 427; SSE-NEXT: imull $42, %ecx, %ecx 428; SSE-NEXT: subl %ecx, %eax 429; SSE-NEXT: movd %eax, %xmm0 430; SSE-NEXT: retq 431; 432; AVX-LABEL: srem_op1_constant: 433; AVX: # %bb.0: 434; AVX-NEXT: movslq (%rdi), %rax 435; AVX-NEXT: imulq $818089009, %rax, %rcx # imm = 0x30C30C31 436; AVX-NEXT: movq %rcx, %rdx 437; AVX-NEXT: shrq $63, %rdx 438; AVX-NEXT: sarq $35, %rcx 439; AVX-NEXT: addl %edx, %ecx 440; AVX-NEXT: imull $42, %ecx, %ecx 441; AVX-NEXT: subl %ecx, %eax 442; AVX-NEXT: vmovd %eax, %xmm0 443; AVX-NEXT: retq 444 %x = load i32, i32* %p 445 %b = srem i32 %x, 42 446 %r = insertelement <4 x i32> undef, i32 %b, i32 0 447 ret <4 x i32> %r 448} 449 450define <4 x i32> @udiv_op0_constant(i32* %p) nounwind { 451; SSE-LABEL: udiv_op0_constant: 452; SSE: # %bb.0: 453; SSE-NEXT: movl $42, %eax 454; SSE-NEXT: xorl %edx, %edx 455; SSE-NEXT: divl (%rdi) 456; SSE-NEXT: movd %eax, %xmm0 457; SSE-NEXT: retq 458; 459; AVX-LABEL: udiv_op0_constant: 460; AVX: # %bb.0: 461; AVX-NEXT: movl $42, %eax 462; AVX-NEXT: xorl %edx, %edx 463; AVX-NEXT: divl (%rdi) 464; AVX-NEXT: vmovd %eax, %xmm0 465; AVX-NEXT: retq 466 %x = load i32, i32* %p 467 %b = udiv i32 42, %x 468 %r = insertelement <4 x i32> undef, i32 %b, i32 0 469 ret <4 x i32> %r 470} 471 472define <2 x i64> @udiv_op1_constant(i64* %p) nounwind { 473; SSE-LABEL: udiv_op1_constant: 474; SSE: # %bb.0: 475; SSE-NEXT: movq (%rdi), %rax 476; SSE-NEXT: shrq %rax 477; SSE-NEXT: movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D 478; SSE-NEXT: mulq %rcx 479; SSE-NEXT: shrq $4, %rdx 480; SSE-NEXT: movq %rdx, %xmm0 481; SSE-NEXT: retq 482; 483; AVX-LABEL: udiv_op1_constant: 484; AVX: # %bb.0: 485; AVX-NEXT: movq (%rdi), %rax 486; AVX-NEXT: shrq %rax 487; AVX-NEXT: movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D 488; AVX-NEXT: mulq %rcx 489; AVX-NEXT: shrq $4, %rdx 490; AVX-NEXT: vmovq %rdx, %xmm0 491; AVX-NEXT: retq 492 %x = load i64, i64* %p 493 %b = udiv i64 %x, 42 494 %r = insertelement <2 x i64> undef, i64 %b, i32 0 495 ret <2 x i64> %r 496} 497 498define <2 x i64> @urem_op0_constant(i64* %p) nounwind { 499; SSE-LABEL: urem_op0_constant: 500; SSE: # %bb.0: 501; SSE-NEXT: movl $42, %eax 502; SSE-NEXT: xorl %edx, %edx 503; SSE-NEXT: divq (%rdi) 504; SSE-NEXT: movq %rdx, %xmm0 505; SSE-NEXT: retq 506; 507; AVX-LABEL: urem_op0_constant: 508; AVX: # %bb.0: 509; AVX-NEXT: movl $42, %eax 510; AVX-NEXT: xorl %edx, %edx 511; AVX-NEXT: divq (%rdi) 512; AVX-NEXT: vmovq %rdx, %xmm0 513; AVX-NEXT: retq 514 %x = load i64, i64* %p 515 %b = urem i64 42, %x 516 %r = insertelement <2 x i64> undef, i64 %b, i32 0 517 ret <2 x i64> %r 518} 519 520define <16 x i8> @urem_op1_constant(i8* %p) nounwind { 521; SSE-LABEL: urem_op1_constant: 522; SSE: # %bb.0: 523; SSE-NEXT: movb (%rdi), %al 524; SSE-NEXT: movl %eax, %ecx 525; SSE-NEXT: shrb %cl 526; SSE-NEXT: movzbl %cl, %ecx 527; SSE-NEXT: imull $49, %ecx, %ecx 528; SSE-NEXT: shrl $10, %ecx 529; SSE-NEXT: imull $42, %ecx, %ecx 530; SSE-NEXT: subb %cl, %al 531; SSE-NEXT: movzbl %al, %eax 532; SSE-NEXT: movd %eax, %xmm0 533; SSE-NEXT: retq 534; 535; AVX-LABEL: urem_op1_constant: 536; AVX: # %bb.0: 537; AVX-NEXT: movb (%rdi), %al 538; AVX-NEXT: movl %eax, %ecx 539; AVX-NEXT: shrb %cl 540; AVX-NEXT: movzbl %cl, %ecx 541; AVX-NEXT: imull $49, %ecx, %ecx 542; AVX-NEXT: shrl $10, %ecx 543; AVX-NEXT: imull $42, %ecx, %ecx 544; AVX-NEXT: subb %cl, %al 545; AVX-NEXT: movzbl %al, %eax 546; AVX-NEXT: vmovd %eax, %xmm0 547; AVX-NEXT: retq 548 %x = load i8, i8* %p 549 %b = urem i8 %x, 42 550 %r = insertelement <16 x i8> undef, i8 %b, i32 0 551 ret <16 x i8> %r 552} 553 554define <4 x float> @fadd_op1_constant(float* %p) nounwind { 555; SSE-LABEL: fadd_op1_constant: 556; SSE: # %bb.0: 557; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 558; SSE-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 559; SSE-NEXT: retq 560; 561; AVX-LABEL: fadd_op1_constant: 562; AVX: # %bb.0: 563; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 564; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 565; AVX-NEXT: retq 566 %x = load float, float* %p 567 %b = fadd float %x, 42.0 568 %r = insertelement <4 x float> undef, float %b, i32 0 569 ret <4 x float> %r 570} 571 572define <2 x double> @fsub_op1_constant(double* %p) nounwind { 573; SSE-LABEL: fsub_op1_constant: 574; SSE: # %bb.0: 575; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 576; SSE-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 577; SSE-NEXT: retq 578; 579; AVX-LABEL: fsub_op1_constant: 580; AVX: # %bb.0: 581; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 582; AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 583; AVX-NEXT: retq 584 %x = load double, double* %p 585 %b = fsub double %x, 42.0 586 %r = insertelement <2 x double> undef, double %b, i32 0 587 ret <2 x double> %r 588} 589 590define <4 x float> @fsub_op0_constant(float* %p) nounwind { 591; SSE-LABEL: fsub_op0_constant: 592; SSE: # %bb.0: 593; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 594; SSE-NEXT: subss (%rdi), %xmm0 595; SSE-NEXT: retq 596; 597; AVX-LABEL: fsub_op0_constant: 598; AVX: # %bb.0: 599; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 600; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 601; AVX-NEXT: retq 602 %x = load float, float* %p 603 %b = fsub float 42.0, %x 604 %r = insertelement <4 x float> undef, float %b, i32 0 605 ret <4 x float> %r 606} 607 608define <4 x float> @fmul_op1_constant(float* %p) nounwind { 609; SSE-LABEL: fmul_op1_constant: 610; SSE: # %bb.0: 611; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 612; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 613; SSE-NEXT: retq 614; 615; AVX-LABEL: fmul_op1_constant: 616; AVX: # %bb.0: 617; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 618; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 619; AVX-NEXT: retq 620 %x = load float, float* %p 621 %b = fmul float %x, 42.0 622 %r = insertelement <4 x float> undef, float %b, i32 0 623 ret <4 x float> %r 624} 625 626define <2 x double> @fdiv_op1_constant(double* %p) nounwind { 627; SSE-LABEL: fdiv_op1_constant: 628; SSE: # %bb.0: 629; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 630; SSE-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 631; SSE-NEXT: retq 632; 633; AVX-LABEL: fdiv_op1_constant: 634; AVX: # %bb.0: 635; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 636; AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 637; AVX-NEXT: retq 638 %x = load double, double* %p 639 %b = fdiv double %x, 42.0 640 %r = insertelement <2 x double> undef, double %b, i32 0 641 ret <2 x double> %r 642} 643 644define <4 x float> @fdiv_op0_constant(float* %p) nounwind { 645; SSE-LABEL: fdiv_op0_constant: 646; SSE: # %bb.0: 647; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 648; SSE-NEXT: divss (%rdi), %xmm0 649; SSE-NEXT: retq 650; 651; AVX-LABEL: fdiv_op0_constant: 652; AVX: # %bb.0: 653; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 654; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 655; AVX-NEXT: retq 656 %x = load float, float* %p 657 %b = fdiv float 42.0, %x 658 %r = insertelement <4 x float> undef, float %b, i32 0 659 ret <4 x float> %r 660} 661 662define <4 x float> @frem_op1_constant(float* %p) nounwind { 663; SSE-LABEL: frem_op1_constant: 664; SSE: # %bb.0: 665; SSE-NEXT: pushq %rax 666; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 667; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 668; SSE-NEXT: callq fmodf@PLT 669; SSE-NEXT: popq %rax 670; SSE-NEXT: retq 671; 672; AVX-LABEL: frem_op1_constant: 673; AVX: # %bb.0: 674; AVX-NEXT: pushq %rax 675; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 676; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 677; AVX-NEXT: callq fmodf@PLT 678; AVX-NEXT: popq %rax 679; AVX-NEXT: retq 680 %x = load float, float* %p 681 %b = frem float %x, 42.0 682 %r = insertelement <4 x float> undef, float %b, i32 0 683 ret <4 x float> %r 684} 685 686define <2 x double> @frem_op0_constant(double* %p) nounwind { 687; SSE-LABEL: frem_op0_constant: 688; SSE: # %bb.0: 689; SSE-NEXT: pushq %rax 690; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 691; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 692; SSE-NEXT: callq fmod@PLT 693; SSE-NEXT: popq %rax 694; SSE-NEXT: retq 695; 696; AVX-LABEL: frem_op0_constant: 697; AVX: # %bb.0: 698; AVX-NEXT: pushq %rax 699; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 700; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 701; AVX-NEXT: callq fmod@PLT 702; AVX-NEXT: popq %rax 703; AVX-NEXT: retq 704 %x = load double, double* %p 705 %b = frem double 42.0, %x 706 %r = insertelement <2 x double> undef, double %b, i32 0 707 ret <2 x double> %r 708} 709 710; Try again with 256-bit types. 711 712define <8 x i32> @add_op1_constant_v8i32(i32* %p) nounwind { 713; SSE-LABEL: add_op1_constant_v8i32: 714; SSE: # %bb.0: 715; SSE-NEXT: movl (%rdi), %eax 716; SSE-NEXT: addl $42, %eax 717; SSE-NEXT: movd %eax, %xmm0 718; SSE-NEXT: retq 719; 720; AVX-LABEL: add_op1_constant_v8i32: 721; AVX: # %bb.0: 722; AVX-NEXT: movl (%rdi), %eax 723; AVX-NEXT: addl $42, %eax 724; AVX-NEXT: vmovd %eax, %xmm0 725; AVX-NEXT: retq 726 %x = load i32, i32* %p 727 %b = add i32 %x, 42 728 %r = insertelement <8 x i32> undef, i32 %b, i32 0 729 ret <8 x i32> %r 730} 731 732define <4 x i64> @sub_op0_constant_v4i64(i64* %p) nounwind { 733; SSE-LABEL: sub_op0_constant_v4i64: 734; SSE: # %bb.0: 735; SSE-NEXT: movl $42, %eax 736; SSE-NEXT: subq (%rdi), %rax 737; SSE-NEXT: movq %rax, %xmm0 738; SSE-NEXT: retq 739; 740; AVX-LABEL: sub_op0_constant_v4i64: 741; AVX: # %bb.0: 742; AVX-NEXT: movl $42, %eax 743; AVX-NEXT: subq (%rdi), %rax 744; AVX-NEXT: vmovq %rax, %xmm0 745; AVX-NEXT: retq 746 %x = load i64, i64* %p 747 %b = sub i64 42, %x 748 %r = insertelement <4 x i64> undef, i64 %b, i32 0 749 ret <4 x i64> %r 750} 751 752define <8 x i32> @mul_op1_constant_v8i32(i32* %p) nounwind { 753; SSE-LABEL: mul_op1_constant_v8i32: 754; SSE: # %bb.0: 755; SSE-NEXT: imull $42, (%rdi), %eax 756; SSE-NEXT: movd %eax, %xmm0 757; SSE-NEXT: retq 758; 759; AVX-LABEL: mul_op1_constant_v8i32: 760; AVX: # %bb.0: 761; AVX-NEXT: imull $42, (%rdi), %eax 762; AVX-NEXT: vmovd %eax, %xmm0 763; AVX-NEXT: retq 764 %x = load i32, i32* %p 765 %b = mul i32 %x, 42 766 %r = insertelement <8 x i32> undef, i32 %b, i32 0 767 ret <8 x i32> %r 768} 769 770define <4 x i64> @or_op1_constant_v4i64(i64* %p) nounwind { 771; SSE-LABEL: or_op1_constant_v4i64: 772; SSE: # %bb.0: 773; SSE-NEXT: movq (%rdi), %rax 774; SSE-NEXT: orq $42, %rax 775; SSE-NEXT: movq %rax, %xmm0 776; SSE-NEXT: retq 777; 778; AVX-LABEL: or_op1_constant_v4i64: 779; AVX: # %bb.0: 780; AVX-NEXT: movq (%rdi), %rax 781; AVX-NEXT: orq $42, %rax 782; AVX-NEXT: vmovq %rax, %xmm0 783; AVX-NEXT: retq 784 %x = load i64, i64* %p 785 %b = or i64 %x, 42 786 %r = insertelement <4 x i64> undef, i64 %b, i32 0 787 ret <4 x i64> %r 788} 789 790; Try again with 512-bit types. 791 792define <16 x i32> @add_op1_constant_v16i32(i32* %p) nounwind { 793; SSE-LABEL: add_op1_constant_v16i32: 794; SSE: # %bb.0: 795; SSE-NEXT: movl (%rdi), %eax 796; SSE-NEXT: addl $42, %eax 797; SSE-NEXT: movd %eax, %xmm0 798; SSE-NEXT: retq 799; 800; AVX-LABEL: add_op1_constant_v16i32: 801; AVX: # %bb.0: 802; AVX-NEXT: movl (%rdi), %eax 803; AVX-NEXT: addl $42, %eax 804; AVX-NEXT: vmovd %eax, %xmm0 805; AVX-NEXT: retq 806 %x = load i32, i32* %p 807 %b = add i32 %x, 42 808 %r = insertelement <16 x i32> undef, i32 %b, i32 0 809 ret <16 x i32> %r 810} 811 812define <8 x i64> @sub_op0_constant_v8i64(i64* %p) nounwind { 813; SSE-LABEL: sub_op0_constant_v8i64: 814; SSE: # %bb.0: 815; SSE-NEXT: movl $42, %eax 816; SSE-NEXT: subq (%rdi), %rax 817; SSE-NEXT: movq %rax, %xmm0 818; SSE-NEXT: retq 819; 820; AVX-LABEL: sub_op0_constant_v8i64: 821; AVX: # %bb.0: 822; AVX-NEXT: movl $42, %eax 823; AVX-NEXT: subq (%rdi), %rax 824; AVX-NEXT: vmovq %rax, %xmm0 825; AVX-NEXT: retq 826 %x = load i64, i64* %p 827 %b = sub i64 42, %x 828 %r = insertelement <8 x i64> undef, i64 %b, i32 0 829 ret <8 x i64> %r 830} 831 832define <16 x i32> @mul_op1_constant_v16i32(i32* %p) nounwind { 833; SSE-LABEL: mul_op1_constant_v16i32: 834; SSE: # %bb.0: 835; SSE-NEXT: imull $42, (%rdi), %eax 836; SSE-NEXT: movd %eax, %xmm0 837; SSE-NEXT: retq 838; 839; AVX-LABEL: mul_op1_constant_v16i32: 840; AVX: # %bb.0: 841; AVX-NEXT: imull $42, (%rdi), %eax 842; AVX-NEXT: vmovd %eax, %xmm0 843; AVX-NEXT: retq 844 %x = load i32, i32* %p 845 %b = mul i32 %x, 42 846 %r = insertelement <16 x i32> undef, i32 %b, i32 0 847 ret <16 x i32> %r 848} 849 850define <8 x i64> @or_op1_constant_v8i64(i64* %p) nounwind { 851; SSE-LABEL: or_op1_constant_v8i64: 852; SSE: # %bb.0: 853; SSE-NEXT: movq (%rdi), %rax 854; SSE-NEXT: orq $42, %rax 855; SSE-NEXT: movq %rax, %xmm0 856; SSE-NEXT: retq 857; 858; AVX-LABEL: or_op1_constant_v8i64: 859; AVX: # %bb.0: 860; AVX-NEXT: movq (%rdi), %rax 861; AVX-NEXT: orq $42, %rax 862; AVX-NEXT: vmovq %rax, %xmm0 863; AVX-NEXT: retq 864 %x = load i64, i64* %p 865 %b = or i64 %x, 42 866 %r = insertelement <8 x i64> undef, i64 %b, i32 0 867 ret <8 x i64> %r 868} 869 870