1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O0 | FileCheck %s --check-prefix=F16C-O0 6 7define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind { 8; X86-LABEL: ir_fadd_v1f16: 9; X86: # %bb.0: 10; X86-NEXT: pushl %esi 11; X86-NEXT: subl $12, %esp 12; X86-NEXT: movzwl {{[0-9]+}}(%esp), %esi 13; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 14; X86-NEXT: movl %eax, (%esp) 15; X86-NEXT: calll __gnu_h2f_ieee 16; X86-NEXT: movl %esi, (%esp) 17; X86-NEXT: fstps {{[0-9]+}}(%esp) 18; X86-NEXT: calll __gnu_h2f_ieee 19; X86-NEXT: fstps {{[0-9]+}}(%esp) 20; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 21; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 22; X86-NEXT: movss %xmm0, (%esp) 23; X86-NEXT: calll __gnu_f2h_ieee 24; X86-NEXT: addl $12, %esp 25; X86-NEXT: popl %esi 26; X86-NEXT: retl 27; 28; X64-LABEL: ir_fadd_v1f16: 29; X64: # %bb.0: 30; X64-NEXT: pushq %rbx 31; X64-NEXT: subq $16, %rsp 32; X64-NEXT: movl %edi, %ebx 33; X64-NEXT: movzwl %si, %edi 34; X64-NEXT: callq __gnu_h2f_ieee@PLT 35; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 36; X64-NEXT: movzwl %bx, %edi 37; X64-NEXT: callq __gnu_h2f_ieee@PLT 38; X64-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 39; X64-NEXT: callq __gnu_f2h_ieee@PLT 40; X64-NEXT: addq $16, %rsp 41; X64-NEXT: popq %rbx 42; X64-NEXT: retq 43; 44; F16C-LABEL: ir_fadd_v1f16: 45; F16C: # %bb.0: 46; F16C-NEXT: movzwl %si, %eax 47; F16C-NEXT: vmovd %eax, %xmm0 48; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 49; F16C-NEXT: movzwl %di, %eax 50; F16C-NEXT: vmovd %eax, %xmm1 51; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 52; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 53; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 54; F16C-NEXT: vmovd %xmm0, %eax 55; F16C-NEXT: # kill: def $ax killed $ax killed $eax 56; F16C-NEXT: retq 57; 58; F16C-O0-LABEL: ir_fadd_v1f16: 59; F16C-O0: # %bb.0: 60; F16C-O0-NEXT: movw %si, %cx 61; F16C-O0-NEXT: movw %di, %ax 62; F16C-O0-NEXT: movzwl %cx, %ecx 63; F16C-O0-NEXT: vmovd %ecx, %xmm0 64; F16C-O0-NEXT: vcvtph2ps %xmm0, %xmm1 65; F16C-O0-NEXT: movzwl %ax, %eax 66; F16C-O0-NEXT: vmovd %eax, %xmm0 67; F16C-O0-NEXT: vcvtph2ps %xmm0, %xmm0 68; F16C-O0-NEXT: vaddss %xmm1, %xmm0, %xmm0 69; F16C-O0-NEXT: vcvtps2ph $4, %xmm0, %xmm0 70; F16C-O0-NEXT: vmovd %xmm0, %eax 71; F16C-O0-NEXT: # kill: def $ax killed $ax killed $eax 72; F16C-O0-NEXT: retq 73 %retval = fadd <1 x half> %arg0, %arg1 74 ret <1 x half> %retval 75} 76 77define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind { 78; X86-LABEL: ir_fadd_v2f16: 79; X86: # %bb.0: 80; X86-NEXT: pushl %ebp 81; X86-NEXT: movl %esp, %ebp 82; X86-NEXT: pushl %ebx 83; X86-NEXT: pushl %edi 84; X86-NEXT: pushl %esi 85; X86-NEXT: andl $-16, %esp 86; X86-NEXT: subl $64, %esp 87; X86-NEXT: movzwl 8(%ebp), %esi 88; X86-NEXT: movzwl 12(%ebp), %edi 89; X86-NEXT: movzwl 20(%ebp), %ebx 90; X86-NEXT: movzwl 16(%ebp), %eax 91; X86-NEXT: movl %eax, (%esp) 92; X86-NEXT: calll __gnu_h2f_ieee 93; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 94; X86-NEXT: movl %ebx, (%esp) 95; X86-NEXT: calll __gnu_h2f_ieee 96; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 97; X86-NEXT: movl %edi, (%esp) 98; X86-NEXT: calll __gnu_h2f_ieee 99; X86-NEXT: movl %esi, (%esp) 100; X86-NEXT: fstps {{[0-9]+}}(%esp) 101; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 102; X86-NEXT: fstps {{[0-9]+}}(%esp) 103; X86-NEXT: calll __gnu_h2f_ieee 104; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 105; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 106; X86-NEXT: movss %xmm0, (%esp) 107; X86-NEXT: fstps {{[0-9]+}}(%esp) 108; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 109; X86-NEXT: fstps {{[0-9]+}}(%esp) 110; X86-NEXT: calll __gnu_f2h_ieee 111; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 112; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 113; X86-NEXT: movss %xmm0, (%esp) 114; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) 115; X86-NEXT: calll __gnu_f2h_ieee 116; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) 117; X86-NEXT: movdqa {{[0-9]+}}(%esp), %xmm0 118; X86-NEXT: movd %xmm0, %eax 119; X86-NEXT: pextrw $1, %xmm0, %edx 120; X86-NEXT: # kill: def $ax killed $ax killed $eax 121; X86-NEXT: # kill: def $dx killed $dx killed $edx 122; X86-NEXT: leal -12(%ebp), %esp 123; X86-NEXT: popl %esi 124; X86-NEXT: popl %edi 125; X86-NEXT: popl %ebx 126; X86-NEXT: popl %ebp 127; X86-NEXT: retl 128; 129; X64-LABEL: ir_fadd_v2f16: 130; X64: # %bb.0: 131; X64-NEXT: pushq %rbp 132; X64-NEXT: pushq %r14 133; X64-NEXT: pushq %rbx 134; X64-NEXT: subq $32, %rsp 135; X64-NEXT: movl %edx, %ebx 136; X64-NEXT: movl %esi, %ebp 137; X64-NEXT: movl %edi, %r14d 138; X64-NEXT: movzwl %cx, %edi 139; X64-NEXT: callq __gnu_h2f_ieee@PLT 140; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 141; X64-NEXT: movzwl %bp, %edi 142; X64-NEXT: callq __gnu_h2f_ieee@PLT 143; X64-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 144; X64-NEXT: callq __gnu_f2h_ieee@PLT 145; X64-NEXT: movw %ax, {{[0-9]+}}(%rsp) 146; X64-NEXT: movzwl %bx, %edi 147; X64-NEXT: callq __gnu_h2f_ieee@PLT 148; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 149; X64-NEXT: movzwl %r14w, %edi 150; X64-NEXT: callq __gnu_h2f_ieee@PLT 151; X64-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 152; X64-NEXT: callq __gnu_f2h_ieee@PLT 153; X64-NEXT: movw %ax, {{[0-9]+}}(%rsp) 154; X64-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 155; X64-NEXT: movd %xmm0, %eax 156; X64-NEXT: pextrw $1, %xmm0, %edx 157; X64-NEXT: # kill: def $ax killed $ax killed $eax 158; X64-NEXT: # kill: def $dx killed $dx killed $edx 159; X64-NEXT: addq $32, %rsp 160; X64-NEXT: popq %rbx 161; X64-NEXT: popq %r14 162; X64-NEXT: popq %rbp 163; X64-NEXT: retq 164; 165; F16C-LABEL: ir_fadd_v2f16: 166; F16C: # %bb.0: 167; F16C-NEXT: movzwl %cx, %eax 168; F16C-NEXT: vmovd %eax, %xmm0 169; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 170; F16C-NEXT: movzwl %si, %eax 171; F16C-NEXT: vmovd %eax, %xmm1 172; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 173; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 174; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 175; F16C-NEXT: vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp) 176; F16C-NEXT: movzwl %dx, %eax 177; F16C-NEXT: vmovd %eax, %xmm0 178; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 179; F16C-NEXT: movzwl %di, %eax 180; F16C-NEXT: vmovd %eax, %xmm1 181; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 182; F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 183; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 184; F16C-NEXT: vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp) 185; F16C-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0 186; F16C-NEXT: vmovd %xmm0, %eax 187; F16C-NEXT: vpextrw $1, %xmm0, %edx 188; F16C-NEXT: # kill: def $ax killed $ax killed $eax 189; F16C-NEXT: # kill: def $dx killed $dx killed $edx 190; F16C-NEXT: retq 191; 192; F16C-O0-LABEL: ir_fadd_v2f16: 193; F16C-O0: # %bb.0: 194; F16C-O0-NEXT: movl %esi, %eax 195; F16C-O0-NEXT: # kill: def $cx killed $cx killed $ecx 196; F16C-O0-NEXT: movw %dx, %si 197; F16C-O0-NEXT: # kill: def $ax killed $ax killed $eax 198; F16C-O0-NEXT: movw %di, %dx 199; F16C-O0-NEXT: movzwl %si, %esi 200; F16C-O0-NEXT: vmovd %esi, %xmm0 201; F16C-O0-NEXT: vcvtph2ps %xmm0, %xmm1 202; F16C-O0-NEXT: movzwl %dx, %edx 203; F16C-O0-NEXT: vmovd %edx, %xmm0 204; F16C-O0-NEXT: vcvtph2ps %xmm0, %xmm0 205; F16C-O0-NEXT: vaddss %xmm1, %xmm0, %xmm0 206; F16C-O0-NEXT: vcvtps2ph $4, %xmm0, %xmm0 207; F16C-O0-NEXT: vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp) 208; F16C-O0-NEXT: movzwl %cx, %ecx 209; F16C-O0-NEXT: vmovd %ecx, %xmm0 210; F16C-O0-NEXT: vcvtph2ps %xmm0, %xmm1 211; F16C-O0-NEXT: movzwl %ax, %eax 212; F16C-O0-NEXT: vmovd %eax, %xmm0 213; F16C-O0-NEXT: vcvtph2ps %xmm0, %xmm0 214; F16C-O0-NEXT: vaddss %xmm1, %xmm0, %xmm0 215; F16C-O0-NEXT: vcvtps2ph $4, %xmm0, %xmm0 216; F16C-O0-NEXT: vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp) 217; F16C-O0-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0 218; F16C-O0-NEXT: vmovd %xmm0, %eax 219; F16C-O0-NEXT: # kill: def $ax killed $ax killed $eax 220; F16C-O0-NEXT: vpextrw $1, %xmm0, %ecx 221; F16C-O0-NEXT: movw %cx, %dx 222; F16C-O0-NEXT: retq 223 %retval = fadd <2 x half> %arg0, %arg1 224 ret <2 x half> %retval 225} 226