1 /*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __IMMINTRIN_H
11 #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
12 #endif
13 
14 #ifndef __FMAINTRIN_H
15 #define __FMAINTRIN_H
16 
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
20 
21 static __inline__ __m128 __DEFAULT_FN_ATTRS128
22 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
23 {
24   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
25 }
26 
27 static __inline__ __m128d __DEFAULT_FN_ATTRS128
28 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
29 {
30   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
31 }
32 
33 static __inline__ __m128 __DEFAULT_FN_ATTRS128
34 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
35 {
36   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
37 }
38 
39 static __inline__ __m128d __DEFAULT_FN_ATTRS128
40 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
41 {
42   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
43 }
44 
45 static __inline__ __m128 __DEFAULT_FN_ATTRS128
46 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
47 {
48   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
49 }
50 
51 static __inline__ __m128d __DEFAULT_FN_ATTRS128
52 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
53 {
54   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
55 }
56 
57 static __inline__ __m128 __DEFAULT_FN_ATTRS128
58 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
59 {
60   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
61 }
62 
63 static __inline__ __m128d __DEFAULT_FN_ATTRS128
64 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
65 {
66   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
67 }
68 
69 static __inline__ __m128 __DEFAULT_FN_ATTRS128
70 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
71 {
72   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
73 }
74 
75 static __inline__ __m128d __DEFAULT_FN_ATTRS128
76 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
77 {
78   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
79 }
80 
81 static __inline__ __m128 __DEFAULT_FN_ATTRS128
82 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
83 {
84   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
85 }
86 
87 static __inline__ __m128d __DEFAULT_FN_ATTRS128
88 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
89 {
90   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
91 }
92 
93 static __inline__ __m128 __DEFAULT_FN_ATTRS128
94 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
95 {
96   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
97 }
98 
99 static __inline__ __m128d __DEFAULT_FN_ATTRS128
100 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
101 {
102   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
103 }
104 
105 static __inline__ __m128 __DEFAULT_FN_ATTRS128
106 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
107 {
108   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
109 }
110 
111 static __inline__ __m128d __DEFAULT_FN_ATTRS128
112 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
113 {
114   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
115 }
116 
117 static __inline__ __m128 __DEFAULT_FN_ATTRS128
118 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
119 {
120   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
121 }
122 
123 static __inline__ __m128d __DEFAULT_FN_ATTRS128
124 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
125 {
126   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
127 }
128 
129 static __inline__ __m128 __DEFAULT_FN_ATTRS128
130 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
131 {
132   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
133 }
134 
135 static __inline__ __m128d __DEFAULT_FN_ATTRS128
136 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
137 {
138   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
139 }
140 
141 static __inline__ __m256 __DEFAULT_FN_ATTRS256
142 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
143 {
144   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
145 }
146 
147 static __inline__ __m256d __DEFAULT_FN_ATTRS256
148 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
149 {
150   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
151 }
152 
153 static __inline__ __m256 __DEFAULT_FN_ATTRS256
154 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
155 {
156   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
157 }
158 
159 static __inline__ __m256d __DEFAULT_FN_ATTRS256
160 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
161 {
162   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
163 }
164 
165 static __inline__ __m256 __DEFAULT_FN_ATTRS256
166 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
167 {
168   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
169 }
170 
171 static __inline__ __m256d __DEFAULT_FN_ATTRS256
172 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
173 {
174   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
175 }
176 
177 static __inline__ __m256 __DEFAULT_FN_ATTRS256
178 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
179 {
180   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
181 }
182 
183 static __inline__ __m256d __DEFAULT_FN_ATTRS256
184 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
185 {
186   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
187 }
188 
189 static __inline__ __m256 __DEFAULT_FN_ATTRS256
190 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
191 {
192   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
193 }
194 
195 static __inline__ __m256d __DEFAULT_FN_ATTRS256
196 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
197 {
198   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
199 }
200 
201 static __inline__ __m256 __DEFAULT_FN_ATTRS256
202 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
203 {
204   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
205 }
206 
207 static __inline__ __m256d __DEFAULT_FN_ATTRS256
208 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
209 {
210   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
211 }
212 
213 #undef __DEFAULT_FN_ATTRS128
214 #undef __DEFAULT_FN_ATTRS256
215 
216 #endif /* __FMAINTRIN_H */
217