1 /*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 #ifndef __IMMINTRIN_H
11 #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
12 #endif
13
14 #ifndef __FMAINTRIN_H
15 #define __FMAINTRIN_H
16
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
20
21 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmadd_ps(__m128 __A,__m128 __B,__m128 __C)22 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
23 {
24 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
25 }
26
27 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmadd_pd(__m128d __A,__m128d __B,__m128d __C)28 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
29 {
30 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
31 }
32
33 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmadd_ss(__m128 __A,__m128 __B,__m128 __C)34 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
35 {
36 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
37 }
38
39 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmadd_sd(__m128d __A,__m128d __B,__m128d __C)40 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
41 {
42 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
43 }
44
45 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsub_ps(__m128 __A,__m128 __B,__m128 __C)46 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
47 {
48 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
49 }
50
51 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsub_pd(__m128d __A,__m128d __B,__m128d __C)52 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
53 {
54 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
55 }
56
57 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsub_ss(__m128 __A,__m128 __B,__m128 __C)58 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
59 {
60 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
61 }
62
63 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsub_sd(__m128d __A,__m128d __B,__m128d __C)64 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
65 {
66 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
67 }
68
69 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmadd_ps(__m128 __A,__m128 __B,__m128 __C)70 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
71 {
72 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
73 }
74
75 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmadd_pd(__m128d __A,__m128d __B,__m128d __C)76 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
77 {
78 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
79 }
80
81 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmadd_ss(__m128 __A,__m128 __B,__m128 __C)82 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
83 {
84 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
85 }
86
87 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmadd_sd(__m128d __A,__m128d __B,__m128d __C)88 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
89 {
90 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
91 }
92
93 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmsub_ps(__m128 __A,__m128 __B,__m128 __C)94 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
95 {
96 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
97 }
98
99 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmsub_pd(__m128d __A,__m128d __B,__m128d __C)100 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
101 {
102 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
103 }
104
105 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmsub_ss(__m128 __A,__m128 __B,__m128 __C)106 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
107 {
108 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
109 }
110
111 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmsub_sd(__m128d __A,__m128d __B,__m128d __C)112 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
113 {
114 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
115 }
116
117 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmaddsub_ps(__m128 __A,__m128 __B,__m128 __C)118 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
119 {
120 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
121 }
122
123 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmaddsub_pd(__m128d __A,__m128d __B,__m128d __C)124 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
125 {
126 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
127 }
128
129 static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsubadd_ps(__m128 __A,__m128 __B,__m128 __C)130 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
131 {
132 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
133 }
134
135 static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsubadd_pd(__m128d __A,__m128d __B,__m128d __C)136 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
137 {
138 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
139 }
140
141 static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmadd_ps(__m256 __A,__m256 __B,__m256 __C)142 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
143 {
144 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
145 }
146
147 static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmadd_pd(__m256d __A,__m256d __B,__m256d __C)148 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
149 {
150 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
151 }
152
153 static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmsub_ps(__m256 __A,__m256 __B,__m256 __C)154 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
155 {
156 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
157 }
158
159 static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmsub_pd(__m256d __A,__m256d __B,__m256d __C)160 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
161 {
162 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
163 }
164
165 static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fnmadd_ps(__m256 __A,__m256 __B,__m256 __C)166 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
167 {
168 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
169 }
170
171 static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fnmadd_pd(__m256d __A,__m256d __B,__m256d __C)172 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
173 {
174 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
175 }
176
177 static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fnmsub_ps(__m256 __A,__m256 __B,__m256 __C)178 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
179 {
180 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
181 }
182
183 static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fnmsub_pd(__m256d __A,__m256d __B,__m256d __C)184 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
185 {
186 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
187 }
188
189 static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmaddsub_ps(__m256 __A,__m256 __B,__m256 __C)190 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
191 {
192 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
193 }
194
195 static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmaddsub_pd(__m256d __A,__m256d __B,__m256d __C)196 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
197 {
198 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
199 }
200
201 static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmsubadd_ps(__m256 __A,__m256 __B,__m256 __C)202 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
203 {
204 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
205 }
206
207 static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmsubadd_pd(__m256d __A,__m256d __B,__m256d __C)208 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
209 {
210 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
211 }
212
213 #undef __DEFAULT_FN_ATTRS128
214 #undef __DEFAULT_FN_ATTRS256
215
216 #endif /* __FMAINTRIN_H */
217