1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O0 | FileCheck %s --check-prefix=F16C-O0
6
7define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind {
8; X86-LABEL: ir_fadd_v1f16:
9; X86:       # %bb.0:
10; X86-NEXT:    pushl %esi
11; X86-NEXT:    subl $12, %esp
12; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %esi
13; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
14; X86-NEXT:    movl %eax, (%esp)
15; X86-NEXT:    calll __gnu_h2f_ieee
16; X86-NEXT:    movl %esi, (%esp)
17; X86-NEXT:    fstps {{[0-9]+}}(%esp)
18; X86-NEXT:    calll __gnu_h2f_ieee
19; X86-NEXT:    fstps {{[0-9]+}}(%esp)
20; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
21; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
22; X86-NEXT:    movss %xmm0, (%esp)
23; X86-NEXT:    calll __gnu_f2h_ieee
24; X86-NEXT:    addl $12, %esp
25; X86-NEXT:    popl %esi
26; X86-NEXT:    retl
27;
28; X64-LABEL: ir_fadd_v1f16:
29; X64:       # %bb.0:
30; X64-NEXT:    pushq %rbx
31; X64-NEXT:    subq $16, %rsp
32; X64-NEXT:    movl %edi, %ebx
33; X64-NEXT:    movzwl %si, %edi
34; X64-NEXT:    callq __gnu_h2f_ieee@PLT
35; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
36; X64-NEXT:    movzwl %bx, %edi
37; X64-NEXT:    callq __gnu_h2f_ieee@PLT
38; X64-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
39; X64-NEXT:    callq __gnu_f2h_ieee@PLT
40; X64-NEXT:    addq $16, %rsp
41; X64-NEXT:    popq %rbx
42; X64-NEXT:    retq
43;
44; F16C-LABEL: ir_fadd_v1f16:
45; F16C:       # %bb.0:
46; F16C-NEXT:    movzwl %si, %eax
47; F16C-NEXT:    vmovd %eax, %xmm0
48; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
49; F16C-NEXT:    movzwl %di, %eax
50; F16C-NEXT:    vmovd %eax, %xmm1
51; F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
52; F16C-NEXT:    vaddss %xmm0, %xmm1, %xmm0
53; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
54; F16C-NEXT:    vmovd %xmm0, %eax
55; F16C-NEXT:    # kill: def $ax killed $ax killed $eax
56; F16C-NEXT:    retq
57;
58; F16C-O0-LABEL: ir_fadd_v1f16:
59; F16C-O0:       # %bb.0:
60; F16C-O0-NEXT:    movw %si, %cx
61; F16C-O0-NEXT:    movw %di, %ax
62; F16C-O0-NEXT:    movzwl %cx, %ecx
63; F16C-O0-NEXT:    vmovd %ecx, %xmm0
64; F16C-O0-NEXT:    vcvtph2ps %xmm0, %xmm1
65; F16C-O0-NEXT:    movzwl %ax, %eax
66; F16C-O0-NEXT:    vmovd %eax, %xmm0
67; F16C-O0-NEXT:    vcvtph2ps %xmm0, %xmm0
68; F16C-O0-NEXT:    vaddss %xmm1, %xmm0, %xmm0
69; F16C-O0-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
70; F16C-O0-NEXT:    vmovd %xmm0, %eax
71; F16C-O0-NEXT:    # kill: def $ax killed $ax killed $eax
72; F16C-O0-NEXT:    retq
73  %retval = fadd <1 x half> %arg0, %arg1
74  ret <1 x half> %retval
75}
76
77define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
78; X86-LABEL: ir_fadd_v2f16:
79; X86:       # %bb.0:
80; X86-NEXT:    pushl %ebp
81; X86-NEXT:    movl %esp, %ebp
82; X86-NEXT:    pushl %ebx
83; X86-NEXT:    pushl %edi
84; X86-NEXT:    pushl %esi
85; X86-NEXT:    andl $-16, %esp
86; X86-NEXT:    subl $64, %esp
87; X86-NEXT:    movzwl 8(%ebp), %esi
88; X86-NEXT:    movzwl 12(%ebp), %edi
89; X86-NEXT:    movzwl 20(%ebp), %ebx
90; X86-NEXT:    movzwl 16(%ebp), %eax
91; X86-NEXT:    movl %eax, (%esp)
92; X86-NEXT:    calll __gnu_h2f_ieee
93; X86-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
94; X86-NEXT:    movl %ebx, (%esp)
95; X86-NEXT:    calll __gnu_h2f_ieee
96; X86-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
97; X86-NEXT:    movl %edi, (%esp)
98; X86-NEXT:    calll __gnu_h2f_ieee
99; X86-NEXT:    movl %esi, (%esp)
100; X86-NEXT:    fstps {{[0-9]+}}(%esp)
101; X86-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
102; X86-NEXT:    fstps {{[0-9]+}}(%esp)
103; X86-NEXT:    calll __gnu_h2f_ieee
104; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
105; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
106; X86-NEXT:    movss %xmm0, (%esp)
107; X86-NEXT:    fstps {{[0-9]+}}(%esp)
108; X86-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
109; X86-NEXT:    fstps {{[0-9]+}}(%esp)
110; X86-NEXT:    calll __gnu_f2h_ieee
111; X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
112; X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
113; X86-NEXT:    movss %xmm0, (%esp)
114; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
115; X86-NEXT:    calll __gnu_f2h_ieee
116; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
117; X86-NEXT:    movdqa {{[0-9]+}}(%esp), %xmm0
118; X86-NEXT:    movd %xmm0, %eax
119; X86-NEXT:    pextrw $1, %xmm0, %edx
120; X86-NEXT:    # kill: def $ax killed $ax killed $eax
121; X86-NEXT:    # kill: def $dx killed $dx killed $edx
122; X86-NEXT:    leal -12(%ebp), %esp
123; X86-NEXT:    popl %esi
124; X86-NEXT:    popl %edi
125; X86-NEXT:    popl %ebx
126; X86-NEXT:    popl %ebp
127; X86-NEXT:    retl
128;
129; X64-LABEL: ir_fadd_v2f16:
130; X64:       # %bb.0:
131; X64-NEXT:    pushq %rbp
132; X64-NEXT:    pushq %r14
133; X64-NEXT:    pushq %rbx
134; X64-NEXT:    subq $32, %rsp
135; X64-NEXT:    movl %edx, %ebx
136; X64-NEXT:    movl %esi, %ebp
137; X64-NEXT:    movl %edi, %r14d
138; X64-NEXT:    movzwl %cx, %edi
139; X64-NEXT:    callq __gnu_h2f_ieee@PLT
140; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
141; X64-NEXT:    movzwl %bp, %edi
142; X64-NEXT:    callq __gnu_h2f_ieee@PLT
143; X64-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
144; X64-NEXT:    callq __gnu_f2h_ieee@PLT
145; X64-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
146; X64-NEXT:    movzwl %bx, %edi
147; X64-NEXT:    callq __gnu_h2f_ieee@PLT
148; X64-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
149; X64-NEXT:    movzwl %r14w, %edi
150; X64-NEXT:    callq __gnu_h2f_ieee@PLT
151; X64-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
152; X64-NEXT:    callq __gnu_f2h_ieee@PLT
153; X64-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
154; X64-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
155; X64-NEXT:    movd %xmm0, %eax
156; X64-NEXT:    pextrw $1, %xmm0, %edx
157; X64-NEXT:    # kill: def $ax killed $ax killed $eax
158; X64-NEXT:    # kill: def $dx killed $dx killed $edx
159; X64-NEXT:    addq $32, %rsp
160; X64-NEXT:    popq %rbx
161; X64-NEXT:    popq %r14
162; X64-NEXT:    popq %rbp
163; X64-NEXT:    retq
164;
165; F16C-LABEL: ir_fadd_v2f16:
166; F16C:       # %bb.0:
167; F16C-NEXT:    movzwl %cx, %eax
168; F16C-NEXT:    vmovd %eax, %xmm0
169; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
170; F16C-NEXT:    movzwl %si, %eax
171; F16C-NEXT:    vmovd %eax, %xmm1
172; F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
173; F16C-NEXT:    vaddss %xmm0, %xmm1, %xmm0
174; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
175; F16C-NEXT:    vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp)
176; F16C-NEXT:    movzwl %dx, %eax
177; F16C-NEXT:    vmovd %eax, %xmm0
178; F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
179; F16C-NEXT:    movzwl %di, %eax
180; F16C-NEXT:    vmovd %eax, %xmm1
181; F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
182; F16C-NEXT:    vaddss %xmm0, %xmm1, %xmm0
183; F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
184; F16C-NEXT:    vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp)
185; F16C-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm0
186; F16C-NEXT:    vmovd %xmm0, %eax
187; F16C-NEXT:    vpextrw $1, %xmm0, %edx
188; F16C-NEXT:    # kill: def $ax killed $ax killed $eax
189; F16C-NEXT:    # kill: def $dx killed $dx killed $edx
190; F16C-NEXT:    retq
191;
192; F16C-O0-LABEL: ir_fadd_v2f16:
193; F16C-O0:       # %bb.0:
194; F16C-O0-NEXT:    movl %esi, %eax
195; F16C-O0-NEXT:    # kill: def $cx killed $cx killed $ecx
196; F16C-O0-NEXT:    movw %dx, %si
197; F16C-O0-NEXT:    # kill: def $ax killed $ax killed $eax
198; F16C-O0-NEXT:    movw %di, %dx
199; F16C-O0-NEXT:    movzwl %si, %esi
200; F16C-O0-NEXT:    vmovd %esi, %xmm0
201; F16C-O0-NEXT:    vcvtph2ps %xmm0, %xmm1
202; F16C-O0-NEXT:    movzwl %dx, %edx
203; F16C-O0-NEXT:    vmovd %edx, %xmm0
204; F16C-O0-NEXT:    vcvtph2ps %xmm0, %xmm0
205; F16C-O0-NEXT:    vaddss %xmm1, %xmm0, %xmm0
206; F16C-O0-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
207; F16C-O0-NEXT:    vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp)
208; F16C-O0-NEXT:    movzwl %cx, %ecx
209; F16C-O0-NEXT:    vmovd %ecx, %xmm0
210; F16C-O0-NEXT:    vcvtph2ps %xmm0, %xmm1
211; F16C-O0-NEXT:    movzwl %ax, %eax
212; F16C-O0-NEXT:    vmovd %eax, %xmm0
213; F16C-O0-NEXT:    vcvtph2ps %xmm0, %xmm0
214; F16C-O0-NEXT:    vaddss %xmm1, %xmm0, %xmm0
215; F16C-O0-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
216; F16C-O0-NEXT:    vpextrw $0, %xmm0, -{{[0-9]+}}(%rsp)
217; F16C-O0-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm0
218; F16C-O0-NEXT:    vmovd %xmm0, %eax
219; F16C-O0-NEXT:    # kill: def $ax killed $ax killed $eax
220; F16C-O0-NEXT:    vpextrw $1, %xmm0, %ecx
221; F16C-O0-NEXT:    movw %cx, %dx
222; F16C-O0-NEXT:    retq
223  %retval = fadd <2 x half> %arg0, %arg1
224  ret <2 x half> %retval
225}
226