1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefix=X64 4 5define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { 6; X86-LABEL: test_pavgusb: 7; X86: # %bb.0: # %entry 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X86-NEXT: pavgusb %mm1, %mm0 10; X86-NEXT: movq %mm0, (%eax) 11; X86-NEXT: retl $4 12; 13; X64-LABEL: test_pavgusb: 14; X64: # %bb.0: # %entry 15; X64-NEXT: pavgusb %mm1, %mm0 16; X64-NEXT: movq2dq %mm0, %xmm0 17; X64-NEXT: retq 18entry: 19 %0 = bitcast x86_mmx %a.coerce to <8 x i8> 20 %1 = bitcast x86_mmx %b.coerce to <8 x i8> 21 %2 = bitcast <8 x i8> %0 to x86_mmx 22 %3 = bitcast <8 x i8> %1 to x86_mmx 23 %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3) 24 %5 = bitcast x86_mmx %4 to <8 x i8> 25 ret <8 x i8> %5 26} 27 28declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone 29 30define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone { 31; X86-LABEL: test_pf2id: 32; X86: # %bb.0: # %entry 33; X86-NEXT: pushl %ebp 34; X86-NEXT: movl %esp, %ebp 35; X86-NEXT: andl $-8, %esp 36; X86-NEXT: subl $8, %esp 37; X86-NEXT: movd 12(%ebp), %mm0 38; X86-NEXT: movd 8(%ebp), %mm1 39; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 40; X86-NEXT: pf2id %mm1, %mm0 41; X86-NEXT: movq %mm0, (%esp) 42; X86-NEXT: movl (%esp), %eax 43; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 44; X86-NEXT: movl %ebp, %esp 45; X86-NEXT: popl %ebp 46; X86-NEXT: retl 47; 48; X64-LABEL: test_pf2id: 49; X64: # %bb.0: # %entry 50; X64-NEXT: movdq2q %xmm0, %mm0 51; X64-NEXT: pf2id %mm0, %mm0 52; X64-NEXT: movq2dq %mm0, %xmm0 53; X64-NEXT: retq 54entry: 55 %0 = bitcast <2 x float> %a to x86_mmx 56 %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0) 57 %2 = bitcast x86_mmx %1 to <2 x i32> 58 ret <2 x i32> %2 59} 60 61declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone 62 63define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 64; X86-LABEL: test_pfacc: 65; X86: # %bb.0: # %entry 66; X86-NEXT: pushl %ebp 67; X86-NEXT: movl %esp, %ebp 68; X86-NEXT: andl $-8, %esp 69; X86-NEXT: subl $8, %esp 70; X86-NEXT: movd 20(%ebp), %mm0 71; X86-NEXT: movd 16(%ebp), %mm1 72; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 73; X86-NEXT: movd 12(%ebp), %mm0 74; X86-NEXT: movd 8(%ebp), %mm2 75; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 76; X86-NEXT: pfacc %mm1, %mm2 77; X86-NEXT: movq %mm2, (%esp) 78; X86-NEXT: flds {{[0-9]+}}(%esp) 79; X86-NEXT: flds (%esp) 80; X86-NEXT: movl %ebp, %esp 81; X86-NEXT: popl %ebp 82; X86-NEXT: retl 83; 84; X64-LABEL: test_pfacc: 85; X64: # %bb.0: # %entry 86; X64-NEXT: movdq2q %xmm1, %mm0 87; X64-NEXT: movdq2q %xmm0, %mm1 88; X64-NEXT: pfacc %mm0, %mm1 89; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 90; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 91; X64-NEXT: retq 92entry: 93 %0 = bitcast <2 x float> %a to x86_mmx 94 %1 = bitcast <2 x float> %b to x86_mmx 95 %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1) 96 %3 = bitcast x86_mmx %2 to <2 x float> 97 ret <2 x float> %3 98} 99 100declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone 101 102define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone { 103; X86-LABEL: test_pfadd: 104; X86: # %bb.0: # %entry 105; X86-NEXT: pushl %ebp 106; X86-NEXT: movl %esp, %ebp 107; X86-NEXT: andl $-8, %esp 108; X86-NEXT: subl $8, %esp 109; X86-NEXT: movd 20(%ebp), %mm0 110; X86-NEXT: movd 16(%ebp), %mm1 111; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 112; X86-NEXT: movd 12(%ebp), %mm0 113; X86-NEXT: movd 8(%ebp), %mm2 114; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 115; X86-NEXT: pfadd %mm1, %mm2 116; X86-NEXT: movq %mm2, (%esp) 117; X86-NEXT: flds {{[0-9]+}}(%esp) 118; X86-NEXT: flds (%esp) 119; X86-NEXT: movl %ebp, %esp 120; X86-NEXT: popl %ebp 121; X86-NEXT: retl 122; 123; X64-LABEL: test_pfadd: 124; X64: # %bb.0: # %entry 125; X64-NEXT: movdq2q %xmm1, %mm0 126; X64-NEXT: movdq2q %xmm0, %mm1 127; X64-NEXT: pfadd %mm0, %mm1 128; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 129; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 130; X64-NEXT: retq 131entry: 132 %0 = bitcast <2 x float> %a to x86_mmx 133 %1 = bitcast <2 x float> %b to x86_mmx 134 %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1) 135 %3 = bitcast x86_mmx %2 to <2 x float> 136 ret <2 x float> %3 137} 138 139declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone 140 141define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone { 142; X86-LABEL: test_pfcmpeq: 143; X86: # %bb.0: # %entry 144; X86-NEXT: pushl %ebp 145; X86-NEXT: movl %esp, %ebp 146; X86-NEXT: andl $-8, %esp 147; X86-NEXT: subl $8, %esp 148; X86-NEXT: movd 20(%ebp), %mm0 149; X86-NEXT: movd 16(%ebp), %mm1 150; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 151; X86-NEXT: movd 12(%ebp), %mm0 152; X86-NEXT: movd 8(%ebp), %mm2 153; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 154; X86-NEXT: pfcmpeq %mm1, %mm2 155; X86-NEXT: movq %mm2, (%esp) 156; X86-NEXT: movl (%esp), %eax 157; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 158; X86-NEXT: movl %ebp, %esp 159; X86-NEXT: popl %ebp 160; X86-NEXT: retl 161; 162; X64-LABEL: test_pfcmpeq: 163; X64: # %bb.0: # %entry 164; X64-NEXT: movdq2q %xmm1, %mm0 165; X64-NEXT: movdq2q %xmm0, %mm1 166; X64-NEXT: pfcmpeq %mm0, %mm1 167; X64-NEXT: movq2dq %mm1, %xmm0 168; X64-NEXT: retq 169entry: 170 %0 = bitcast <2 x float> %a to x86_mmx 171 %1 = bitcast <2 x float> %b to x86_mmx 172 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1) 173 %3 = bitcast x86_mmx %2 to <2 x i32> 174 ret <2 x i32> %3 175} 176 177declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone 178 179define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone { 180; X86-LABEL: test_pfcmpge: 181; X86: # %bb.0: # %entry 182; X86-NEXT: pushl %ebp 183; X86-NEXT: movl %esp, %ebp 184; X86-NEXT: andl $-8, %esp 185; X86-NEXT: subl $8, %esp 186; X86-NEXT: movd 20(%ebp), %mm0 187; X86-NEXT: movd 16(%ebp), %mm1 188; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 189; X86-NEXT: movd 12(%ebp), %mm0 190; X86-NEXT: movd 8(%ebp), %mm2 191; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 192; X86-NEXT: pfcmpge %mm1, %mm2 193; X86-NEXT: movq %mm2, (%esp) 194; X86-NEXT: movl (%esp), %eax 195; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 196; X86-NEXT: movl %ebp, %esp 197; X86-NEXT: popl %ebp 198; X86-NEXT: retl 199; 200; X64-LABEL: test_pfcmpge: 201; X64: # %bb.0: # %entry 202; X64-NEXT: movdq2q %xmm1, %mm0 203; X64-NEXT: movdq2q %xmm0, %mm1 204; X64-NEXT: pfcmpge %mm0, %mm1 205; X64-NEXT: movq2dq %mm1, %xmm0 206; X64-NEXT: retq 207entry: 208 %0 = bitcast <2 x float> %a to x86_mmx 209 %1 = bitcast <2 x float> %b to x86_mmx 210 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1) 211 %3 = bitcast x86_mmx %2 to <2 x i32> 212 ret <2 x i32> %3 213} 214 215declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone 216 217define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone { 218; X86-LABEL: test_pfcmpgt: 219; X86: # %bb.0: # %entry 220; X86-NEXT: pushl %ebp 221; X86-NEXT: movl %esp, %ebp 222; X86-NEXT: andl $-8, %esp 223; X86-NEXT: subl $8, %esp 224; X86-NEXT: movd 20(%ebp), %mm0 225; X86-NEXT: movd 16(%ebp), %mm1 226; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 227; X86-NEXT: movd 12(%ebp), %mm0 228; X86-NEXT: movd 8(%ebp), %mm2 229; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 230; X86-NEXT: pfcmpgt %mm1, %mm2 231; X86-NEXT: movq %mm2, (%esp) 232; X86-NEXT: movl (%esp), %eax 233; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 234; X86-NEXT: movl %ebp, %esp 235; X86-NEXT: popl %ebp 236; X86-NEXT: retl 237; 238; X64-LABEL: test_pfcmpgt: 239; X64: # %bb.0: # %entry 240; X64-NEXT: movdq2q %xmm1, %mm0 241; X64-NEXT: movdq2q %xmm0, %mm1 242; X64-NEXT: pfcmpgt %mm0, %mm1 243; X64-NEXT: movq2dq %mm1, %xmm0 244; X64-NEXT: retq 245entry: 246 %0 = bitcast <2 x float> %a to x86_mmx 247 %1 = bitcast <2 x float> %b to x86_mmx 248 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1) 249 %3 = bitcast x86_mmx %2 to <2 x i32> 250 ret <2 x i32> %3 251} 252 253declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone 254 255define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone { 256; X86-LABEL: test_pfmax: 257; X86: # %bb.0: # %entry 258; X86-NEXT: pushl %ebp 259; X86-NEXT: movl %esp, %ebp 260; X86-NEXT: andl $-8, %esp 261; X86-NEXT: subl $8, %esp 262; X86-NEXT: movd 20(%ebp), %mm0 263; X86-NEXT: movd 16(%ebp), %mm1 264; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 265; X86-NEXT: movd 12(%ebp), %mm0 266; X86-NEXT: movd 8(%ebp), %mm2 267; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 268; X86-NEXT: pfmax %mm1, %mm2 269; X86-NEXT: movq %mm2, (%esp) 270; X86-NEXT: flds {{[0-9]+}}(%esp) 271; X86-NEXT: flds (%esp) 272; X86-NEXT: movl %ebp, %esp 273; X86-NEXT: popl %ebp 274; X86-NEXT: retl 275; 276; X64-LABEL: test_pfmax: 277; X64: # %bb.0: # %entry 278; X64-NEXT: movdq2q %xmm1, %mm0 279; X64-NEXT: movdq2q %xmm0, %mm1 280; X64-NEXT: pfmax %mm0, %mm1 281; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 282; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 283; X64-NEXT: retq 284entry: 285 %0 = bitcast <2 x float> %a to x86_mmx 286 %1 = bitcast <2 x float> %b to x86_mmx 287 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1) 288 %3 = bitcast x86_mmx %2 to <2 x float> 289 ret <2 x float> %3 290} 291 292declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone 293 294define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone { 295; X86-LABEL: test_pfmin: 296; X86: # %bb.0: # %entry 297; X86-NEXT: pushl %ebp 298; X86-NEXT: movl %esp, %ebp 299; X86-NEXT: andl $-8, %esp 300; X86-NEXT: subl $8, %esp 301; X86-NEXT: movd 20(%ebp), %mm0 302; X86-NEXT: movd 16(%ebp), %mm1 303; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 304; X86-NEXT: movd 12(%ebp), %mm0 305; X86-NEXT: movd 8(%ebp), %mm2 306; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 307; X86-NEXT: pfmin %mm1, %mm2 308; X86-NEXT: movq %mm2, (%esp) 309; X86-NEXT: flds {{[0-9]+}}(%esp) 310; X86-NEXT: flds (%esp) 311; X86-NEXT: movl %ebp, %esp 312; X86-NEXT: popl %ebp 313; X86-NEXT: retl 314; 315; X64-LABEL: test_pfmin: 316; X64: # %bb.0: # %entry 317; X64-NEXT: movdq2q %xmm1, %mm0 318; X64-NEXT: movdq2q %xmm0, %mm1 319; X64-NEXT: pfmin %mm0, %mm1 320; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 321; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 322; X64-NEXT: retq 323entry: 324 %0 = bitcast <2 x float> %a to x86_mmx 325 %1 = bitcast <2 x float> %b to x86_mmx 326 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1) 327 %3 = bitcast x86_mmx %2 to <2 x float> 328 ret <2 x float> %3 329} 330 331declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone 332 333define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone { 334; X86-LABEL: test_pfmul: 335; X86: # %bb.0: # %entry 336; X86-NEXT: pushl %ebp 337; X86-NEXT: movl %esp, %ebp 338; X86-NEXT: andl $-8, %esp 339; X86-NEXT: subl $8, %esp 340; X86-NEXT: movd 20(%ebp), %mm0 341; X86-NEXT: movd 16(%ebp), %mm1 342; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 343; X86-NEXT: movd 12(%ebp), %mm0 344; X86-NEXT: movd 8(%ebp), %mm2 345; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 346; X86-NEXT: pfmul %mm1, %mm2 347; X86-NEXT: movq %mm2, (%esp) 348; X86-NEXT: flds {{[0-9]+}}(%esp) 349; X86-NEXT: flds (%esp) 350; X86-NEXT: movl %ebp, %esp 351; X86-NEXT: popl %ebp 352; X86-NEXT: retl 353; 354; X64-LABEL: test_pfmul: 355; X64: # %bb.0: # %entry 356; X64-NEXT: movdq2q %xmm1, %mm0 357; X64-NEXT: movdq2q %xmm0, %mm1 358; X64-NEXT: pfmul %mm0, %mm1 359; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 360; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 361; X64-NEXT: retq 362entry: 363 %0 = bitcast <2 x float> %a to x86_mmx 364 %1 = bitcast <2 x float> %b to x86_mmx 365 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1) 366 %3 = bitcast x86_mmx %2 to <2 x float> 367 ret <2 x float> %3 368} 369 370declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone 371 372define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone { 373; X86-LABEL: test_pfrcp: 374; X86: # %bb.0: # %entry 375; X86-NEXT: pushl %ebp 376; X86-NEXT: movl %esp, %ebp 377; X86-NEXT: andl $-8, %esp 378; X86-NEXT: subl $8, %esp 379; X86-NEXT: movd 12(%ebp), %mm0 380; X86-NEXT: movd 8(%ebp), %mm1 381; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 382; X86-NEXT: pfrcp %mm1, %mm0 383; X86-NEXT: movq %mm0, (%esp) 384; X86-NEXT: flds {{[0-9]+}}(%esp) 385; X86-NEXT: flds (%esp) 386; X86-NEXT: movl %ebp, %esp 387; X86-NEXT: popl %ebp 388; X86-NEXT: retl 389; 390; X64-LABEL: test_pfrcp: 391; X64: # %bb.0: # %entry 392; X64-NEXT: movdq2q %xmm0, %mm0 393; X64-NEXT: pfrcp %mm0, %mm0 394; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 395; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 396; X64-NEXT: retq 397entry: 398 %0 = bitcast <2 x float> %a to x86_mmx 399 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0) 400 %2 = bitcast x86_mmx %1 to <2 x float> 401 ret <2 x float> %2 402} 403 404declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone 405 406define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone { 407; X86-LABEL: test_pfrcpit1: 408; X86: # %bb.0: # %entry 409; X86-NEXT: pushl %ebp 410; X86-NEXT: movl %esp, %ebp 411; X86-NEXT: andl $-8, %esp 412; X86-NEXT: subl $8, %esp 413; X86-NEXT: movd 20(%ebp), %mm0 414; X86-NEXT: movd 16(%ebp), %mm1 415; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 416; X86-NEXT: movd 12(%ebp), %mm0 417; X86-NEXT: movd 8(%ebp), %mm2 418; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 419; X86-NEXT: pfrcpit1 %mm1, %mm2 420; X86-NEXT: movq %mm2, (%esp) 421; X86-NEXT: flds {{[0-9]+}}(%esp) 422; X86-NEXT: flds (%esp) 423; X86-NEXT: movl %ebp, %esp 424; X86-NEXT: popl %ebp 425; X86-NEXT: retl 426; 427; X64-LABEL: test_pfrcpit1: 428; X64: # %bb.0: # %entry 429; X64-NEXT: movdq2q %xmm1, %mm0 430; X64-NEXT: movdq2q %xmm0, %mm1 431; X64-NEXT: pfrcpit1 %mm0, %mm1 432; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 433; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 434; X64-NEXT: retq 435entry: 436 %0 = bitcast <2 x float> %a to x86_mmx 437 %1 = bitcast <2 x float> %b to x86_mmx 438 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1) 439 %3 = bitcast x86_mmx %2 to <2 x float> 440 ret <2 x float> %3 441} 442 443declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone 444 445define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone { 446; X86-LABEL: test_pfrcpit2: 447; X86: # %bb.0: # %entry 448; X86-NEXT: pushl %ebp 449; X86-NEXT: movl %esp, %ebp 450; X86-NEXT: andl $-8, %esp 451; X86-NEXT: subl $8, %esp 452; X86-NEXT: movd 20(%ebp), %mm0 453; X86-NEXT: movd 16(%ebp), %mm1 454; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 455; X86-NEXT: movd 12(%ebp), %mm0 456; X86-NEXT: movd 8(%ebp), %mm2 457; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 458; X86-NEXT: pfrcpit2 %mm1, %mm2 459; X86-NEXT: movq %mm2, (%esp) 460; X86-NEXT: flds {{[0-9]+}}(%esp) 461; X86-NEXT: flds (%esp) 462; X86-NEXT: movl %ebp, %esp 463; X86-NEXT: popl %ebp 464; X86-NEXT: retl 465; 466; X64-LABEL: test_pfrcpit2: 467; X64: # %bb.0: # %entry 468; X64-NEXT: movdq2q %xmm1, %mm0 469; X64-NEXT: movdq2q %xmm0, %mm1 470; X64-NEXT: pfrcpit2 %mm0, %mm1 471; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 472; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 473; X64-NEXT: retq 474entry: 475 %0 = bitcast <2 x float> %a to x86_mmx 476 %1 = bitcast <2 x float> %b to x86_mmx 477 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1) 478 %3 = bitcast x86_mmx %2 to <2 x float> 479 ret <2 x float> %3 480} 481 482declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone 483 484define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone { 485; X86-LABEL: test_pfrsqrt: 486; X86: # %bb.0: # %entry 487; X86-NEXT: pushl %ebp 488; X86-NEXT: movl %esp, %ebp 489; X86-NEXT: andl $-8, %esp 490; X86-NEXT: subl $8, %esp 491; X86-NEXT: movd 12(%ebp), %mm0 492; X86-NEXT: movd 8(%ebp), %mm1 493; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 494; X86-NEXT: pfrsqrt %mm1, %mm0 495; X86-NEXT: movq %mm0, (%esp) 496; X86-NEXT: flds {{[0-9]+}}(%esp) 497; X86-NEXT: flds (%esp) 498; X86-NEXT: movl %ebp, %esp 499; X86-NEXT: popl %ebp 500; X86-NEXT: retl 501; 502; X64-LABEL: test_pfrsqrt: 503; X64: # %bb.0: # %entry 504; X64-NEXT: movdq2q %xmm0, %mm0 505; X64-NEXT: pfrsqrt %mm0, %mm0 506; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 507; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 508; X64-NEXT: retq 509entry: 510 %0 = bitcast <2 x float> %a to x86_mmx 511 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0) 512 %2 = bitcast x86_mmx %1 to <2 x float> 513 ret <2 x float> %2 514} 515 516declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone 517 518define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone { 519; X86-LABEL: test_pfrsqit1: 520; X86: # %bb.0: # %entry 521; X86-NEXT: pushl %ebp 522; X86-NEXT: movl %esp, %ebp 523; X86-NEXT: andl $-8, %esp 524; X86-NEXT: subl $8, %esp 525; X86-NEXT: movd 20(%ebp), %mm0 526; X86-NEXT: movd 16(%ebp), %mm1 527; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 528; X86-NEXT: movd 12(%ebp), %mm0 529; X86-NEXT: movd 8(%ebp), %mm2 530; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 531; X86-NEXT: pfrsqit1 %mm1, %mm2 532; X86-NEXT: movq %mm2, (%esp) 533; X86-NEXT: flds {{[0-9]+}}(%esp) 534; X86-NEXT: flds (%esp) 535; X86-NEXT: movl %ebp, %esp 536; X86-NEXT: popl %ebp 537; X86-NEXT: retl 538; 539; X64-LABEL: test_pfrsqit1: 540; X64: # %bb.0: # %entry 541; X64-NEXT: movdq2q %xmm1, %mm0 542; X64-NEXT: movdq2q %xmm0, %mm1 543; X64-NEXT: pfrsqit1 %mm0, %mm1 544; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 545; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 546; X64-NEXT: retq 547entry: 548 %0 = bitcast <2 x float> %a to x86_mmx 549 %1 = bitcast <2 x float> %b to x86_mmx 550 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1) 551 %3 = bitcast x86_mmx %2 to <2 x float> 552 ret <2 x float> %3 553} 554 555declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone 556 557define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone { 558; X86-LABEL: test_pfsub: 559; X86: # %bb.0: # %entry 560; X86-NEXT: pushl %ebp 561; X86-NEXT: movl %esp, %ebp 562; X86-NEXT: andl $-8, %esp 563; X86-NEXT: subl $8, %esp 564; X86-NEXT: movd 20(%ebp), %mm0 565; X86-NEXT: movd 16(%ebp), %mm1 566; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 567; X86-NEXT: movd 12(%ebp), %mm0 568; X86-NEXT: movd 8(%ebp), %mm2 569; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 570; X86-NEXT: pfsub %mm1, %mm2 571; X86-NEXT: movq %mm2, (%esp) 572; X86-NEXT: flds {{[0-9]+}}(%esp) 573; X86-NEXT: flds (%esp) 574; X86-NEXT: movl %ebp, %esp 575; X86-NEXT: popl %ebp 576; X86-NEXT: retl 577; 578; X64-LABEL: test_pfsub: 579; X64: # %bb.0: # %entry 580; X64-NEXT: movdq2q %xmm1, %mm0 581; X64-NEXT: movdq2q %xmm0, %mm1 582; X64-NEXT: pfsub %mm0, %mm1 583; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 584; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 585; X64-NEXT: retq 586entry: 587 %0 = bitcast <2 x float> %a to x86_mmx 588 %1 = bitcast <2 x float> %b to x86_mmx 589 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1) 590 %3 = bitcast x86_mmx %2 to <2 x float> 591 ret <2 x float> %3 592} 593 594declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone 595 596define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone { 597; X86-LABEL: test_pfsubr: 598; X86: # %bb.0: # %entry 599; X86-NEXT: pushl %ebp 600; X86-NEXT: movl %esp, %ebp 601; X86-NEXT: andl $-8, %esp 602; X86-NEXT: subl $8, %esp 603; X86-NEXT: movd 20(%ebp), %mm0 604; X86-NEXT: movd 16(%ebp), %mm1 605; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 606; X86-NEXT: movd 12(%ebp), %mm0 607; X86-NEXT: movd 8(%ebp), %mm2 608; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 609; X86-NEXT: pfsubr %mm1, %mm2 610; X86-NEXT: movq %mm2, (%esp) 611; X86-NEXT: flds {{[0-9]+}}(%esp) 612; X86-NEXT: flds (%esp) 613; X86-NEXT: movl %ebp, %esp 614; X86-NEXT: popl %ebp 615; X86-NEXT: retl 616; 617; X64-LABEL: test_pfsubr: 618; X64: # %bb.0: # %entry 619; X64-NEXT: movdq2q %xmm1, %mm0 620; X64-NEXT: movdq2q %xmm0, %mm1 621; X64-NEXT: pfsubr %mm0, %mm1 622; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 623; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 624; X64-NEXT: retq 625entry: 626 %0 = bitcast <2 x float> %a to x86_mmx 627 %1 = bitcast <2 x float> %b to x86_mmx 628 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1) 629 %3 = bitcast x86_mmx %2 to <2 x float> 630 ret <2 x float> %3 631} 632 633declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone 634 635define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone { 636; X86-LABEL: test_pi2fd: 637; X86: # %bb.0: # %entry 638; X86-NEXT: pushl %ebp 639; X86-NEXT: movl %esp, %ebp 640; X86-NEXT: andl $-8, %esp 641; X86-NEXT: subl $8, %esp 642; X86-NEXT: pi2fd %mm0, %mm0 643; X86-NEXT: movq %mm0, (%esp) 644; X86-NEXT: flds {{[0-9]+}}(%esp) 645; X86-NEXT: flds (%esp) 646; X86-NEXT: movl %ebp, %esp 647; X86-NEXT: popl %ebp 648; X86-NEXT: retl 649; 650; X64-LABEL: test_pi2fd: 651; X64: # %bb.0: # %entry 652; X64-NEXT: pi2fd %mm0, %mm0 653; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 654; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 655; X64-NEXT: retq 656entry: 657 %0 = bitcast x86_mmx %a.coerce to <2 x i32> 658 %1 = bitcast <2 x i32> %0 to x86_mmx 659 %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1) 660 %3 = bitcast x86_mmx %2 to <2 x float> 661 ret <2 x float> %3 662} 663 664declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone 665 666define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { 667; X86-LABEL: test_pmulhrw: 668; X86: # %bb.0: # %entry 669; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 670; X86-NEXT: pmulhrw %mm1, %mm0 671; X86-NEXT: movq %mm0, (%eax) 672; X86-NEXT: retl $4 673; 674; X64-LABEL: test_pmulhrw: 675; X64: # %bb.0: # %entry 676; X64-NEXT: pmulhrw %mm1, %mm0 677; X64-NEXT: movq2dq %mm0, %xmm0 678; X64-NEXT: retq 679entry: 680 %0 = bitcast x86_mmx %a.coerce to <4 x i16> 681 %1 = bitcast x86_mmx %b.coerce to <4 x i16> 682 %2 = bitcast <4 x i16> %0 to x86_mmx 683 %3 = bitcast <4 x i16> %1 to x86_mmx 684 %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3) 685 %5 = bitcast x86_mmx %4 to <4 x i16> 686 ret <4 x i16> %5 687} 688 689declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone 690 691define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone { 692; X86-LABEL: test_pf2iw: 693; X86: # %bb.0: # %entry 694; X86-NEXT: pushl %ebp 695; X86-NEXT: movl %esp, %ebp 696; X86-NEXT: andl $-8, %esp 697; X86-NEXT: subl $8, %esp 698; X86-NEXT: movd 12(%ebp), %mm0 699; X86-NEXT: movd 8(%ebp), %mm1 700; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 701; X86-NEXT: pf2iw %mm1, %mm0 702; X86-NEXT: movq %mm0, (%esp) 703; X86-NEXT: movl (%esp), %eax 704; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 705; X86-NEXT: movl %ebp, %esp 706; X86-NEXT: popl %ebp 707; X86-NEXT: retl 708; 709; X64-LABEL: test_pf2iw: 710; X64: # %bb.0: # %entry 711; X64-NEXT: movdq2q %xmm0, %mm0 712; X64-NEXT: pf2iw %mm0, %mm0 713; X64-NEXT: movq2dq %mm0, %xmm0 714; X64-NEXT: retq 715entry: 716 %0 = bitcast <2 x float> %a to x86_mmx 717 %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0) 718 %2 = bitcast x86_mmx %1 to <2 x i32> 719 ret <2 x i32> %2 720} 721 722declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone 723 724define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 725; X86-LABEL: test_pfnacc: 726; X86: # %bb.0: # %entry 727; X86-NEXT: pushl %ebp 728; X86-NEXT: movl %esp, %ebp 729; X86-NEXT: andl $-8, %esp 730; X86-NEXT: subl $8, %esp 731; X86-NEXT: movd 20(%ebp), %mm0 732; X86-NEXT: movd 16(%ebp), %mm1 733; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 734; X86-NEXT: movd 12(%ebp), %mm0 735; X86-NEXT: movd 8(%ebp), %mm2 736; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 737; X86-NEXT: pfnacc %mm1, %mm2 738; X86-NEXT: movq %mm2, (%esp) 739; X86-NEXT: flds {{[0-9]+}}(%esp) 740; X86-NEXT: flds (%esp) 741; X86-NEXT: movl %ebp, %esp 742; X86-NEXT: popl %ebp 743; X86-NEXT: retl 744; 745; X64-LABEL: test_pfnacc: 746; X64: # %bb.0: # %entry 747; X64-NEXT: movdq2q %xmm1, %mm0 748; X64-NEXT: movdq2q %xmm0, %mm1 749; X64-NEXT: pfnacc %mm0, %mm1 750; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 751; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 752; X64-NEXT: retq 753entry: 754 %0 = bitcast <2 x float> %a to x86_mmx 755 %1 = bitcast <2 x float> %b to x86_mmx 756 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1) 757 %3 = bitcast x86_mmx %2 to <2 x float> 758 ret <2 x float> %3 759} 760 761declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone 762 763define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { 764; X86-LABEL: test_pfpnacc: 765; X86: # %bb.0: # %entry 766; X86-NEXT: pushl %ebp 767; X86-NEXT: movl %esp, %ebp 768; X86-NEXT: andl $-8, %esp 769; X86-NEXT: subl $8, %esp 770; X86-NEXT: movd 20(%ebp), %mm0 771; X86-NEXT: movd 16(%ebp), %mm1 772; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 773; X86-NEXT: movd 12(%ebp), %mm0 774; X86-NEXT: movd 8(%ebp), %mm2 775; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 776; X86-NEXT: pfpnacc %mm1, %mm2 777; X86-NEXT: movq %mm2, (%esp) 778; X86-NEXT: flds {{[0-9]+}}(%esp) 779; X86-NEXT: flds (%esp) 780; X86-NEXT: movl %ebp, %esp 781; X86-NEXT: popl %ebp 782; X86-NEXT: retl 783; 784; X64-LABEL: test_pfpnacc: 785; X64: # %bb.0: # %entry 786; X64-NEXT: movdq2q %xmm1, %mm0 787; X64-NEXT: movdq2q %xmm0, %mm1 788; X64-NEXT: pfpnacc %mm0, %mm1 789; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) 790; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 791; X64-NEXT: retq 792entry: 793 %0 = bitcast <2 x float> %a to x86_mmx 794 %1 = bitcast <2 x float> %b to x86_mmx 795 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1) 796 %3 = bitcast x86_mmx %2 to <2 x float> 797 ret <2 x float> %3 798} 799 800declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone 801 802define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone { 803; X86-LABEL: test_pi2fw: 804; X86: # %bb.0: # %entry 805; X86-NEXT: pushl %ebp 806; X86-NEXT: movl %esp, %ebp 807; X86-NEXT: andl $-8, %esp 808; X86-NEXT: subl $8, %esp 809; X86-NEXT: pi2fw %mm0, %mm0 810; X86-NEXT: movq %mm0, (%esp) 811; X86-NEXT: flds {{[0-9]+}}(%esp) 812; X86-NEXT: flds (%esp) 813; X86-NEXT: movl %ebp, %esp 814; X86-NEXT: popl %ebp 815; X86-NEXT: retl 816; 817; X64-LABEL: test_pi2fw: 818; X64: # %bb.0: # %entry 819; X64-NEXT: pi2fw %mm0, %mm0 820; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 821; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 822; X64-NEXT: retq 823entry: 824 %0 = bitcast x86_mmx %a.coerce to <2 x i32> 825 %1 = bitcast <2 x i32> %0 to x86_mmx 826 %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1) 827 %3 = bitcast x86_mmx %2 to <2 x float> 828 ret <2 x float> %3 829} 830 831declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone 832 833define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone { 834; X86-LABEL: test_pswapdsf: 835; X86: # %bb.0: # %entry 836; X86-NEXT: pushl %ebp 837; X86-NEXT: movl %esp, %ebp 838; X86-NEXT: andl $-8, %esp 839; X86-NEXT: subl $8, %esp 840; X86-NEXT: movd 12(%ebp), %mm0 841; X86-NEXT: movd 8(%ebp), %mm1 842; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 843; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0] 844; X86-NEXT: movq %mm0, (%esp) 845; X86-NEXT: flds {{[0-9]+}}(%esp) 846; X86-NEXT: flds (%esp) 847; X86-NEXT: movl %ebp, %esp 848; X86-NEXT: popl %ebp 849; X86-NEXT: retl 850; 851; X64-LABEL: test_pswapdsf: 852; X64: # %bb.0: # %entry 853; X64-NEXT: movdq2q %xmm0, %mm0 854; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] 855; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) 856; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 857; X64-NEXT: retq 858entry: 859 %0 = bitcast <2 x float> %a to x86_mmx 860 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) 861 %2 = bitcast x86_mmx %1 to <2 x float> 862 ret <2 x float> %2 863} 864 865define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { 866; X86-LABEL: test_pswapdsi: 867; X86: # %bb.0: # %entry 868; X86-NEXT: pushl %ebp 869; X86-NEXT: movl %esp, %ebp 870; X86-NEXT: andl $-8, %esp 871; X86-NEXT: subl $8, %esp 872; X86-NEXT: movd 12(%ebp), %mm0 873; X86-NEXT: movd 8(%ebp), %mm1 874; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 875; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0] 876; X86-NEXT: movq %mm0, (%esp) 877; X86-NEXT: movl (%esp), %eax 878; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 879; X86-NEXT: movl %ebp, %esp 880; X86-NEXT: popl %ebp 881; X86-NEXT: retl 882; 883; X64-LABEL: test_pswapdsi: 884; X64: # %bb.0: # %entry 885; X64-NEXT: movdq2q %xmm0, %mm0 886; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] 887; X64-NEXT: movq2dq %mm0, %xmm0 888; X64-NEXT: retq 889entry: 890 %0 = bitcast <2 x i32> %a to x86_mmx 891 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) 892 %2 = bitcast x86_mmx %1 to <2 x i32> 893 ret <2 x i32> %2 894} 895 896declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone 897