1 /*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __IMMINTRIN_H
11 #error "Never use <bmi2intrin.h> directly; include <immintrin.h> instead."
12 #endif
13 
14 #ifndef __BMI2INTRIN_H
15 #define __BMI2INTRIN_H
16 
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
19 
20 /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
21 ///    starting at bit number \a __Y.
22 ///
23 /// \code{.operation}
24 /// i := __Y[7:0]
25 /// result := __X
26 /// IF i < 32
27 ///   result[31:i] := 0
28 /// FI
29 /// \endcode
30 ///
31 /// \headerfile <immintrin.h>
32 ///
33 /// This intrinsic corresponds to the \c BZHI instruction.
34 ///
35 /// \param __X
36 ///    The 32-bit source value to copy.
37 /// \param __Y
38 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
39 /// \returns The partially zeroed 32-bit value.
40 static __inline__ unsigned int __DEFAULT_FN_ATTRS
41 _bzhi_u32(unsigned int __X, unsigned int __Y)
42 {
43   return __builtin_ia32_bzhi_si(__X, __Y);
44 }
45 
46 /// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
47 ///    into the 32-bit result, according to the mask in the unsigned 32-bit
48 ///    integer \a __Y. All other bits of the result are zero.
49 ///
50 /// \code{.operation}
51 /// i := 0
52 /// result := 0
53 /// FOR m := 0 TO 31
54 ///   IF __Y[m] == 1
55 ///     result[m] := __X[i]
56 ///     i := i + 1
57 ///   ENDIF
58 /// ENDFOR
59 /// \endcode
60 ///
61 /// \headerfile <immintrin.h>
62 ///
63 /// This intrinsic corresponds to the \c PDEP instruction.
64 ///
65 /// \param __X
66 ///    The 32-bit source value to copy.
67 /// \param __Y
68 ///    The 32-bit mask specifying where to deposit source bits.
69 /// \returns The 32-bit result.
70 static __inline__ unsigned int __DEFAULT_FN_ATTRS
71 _pdep_u32(unsigned int __X, unsigned int __Y)
72 {
73   return __builtin_ia32_pdep_si(__X, __Y);
74 }
75 
76 /// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
77 ///    low-order bits of the 32-bit result, according to the mask in the
78 ///    unsigned 32-bit integer \a __Y. All other bits of the result are zero.
79 ///
80 /// \code{.operation}
81 /// i := 0
82 /// result := 0
83 /// FOR m := 0 TO 31
84 ///   IF __Y[m] == 1
85 ///     result[i] := __X[m]
86 ///     i := i + 1
87 ///   ENDIF
88 /// ENDFOR
89 /// \endcode
90 ///
91 /// \headerfile <immintrin.h>
92 ///
93 /// This intrinsic corresponds to the \c PEXT instruction.
94 ///
95 /// \param __X
96 ///    The 32-bit source value to copy.
97 /// \param __Y
98 ///    The 32-bit mask specifying which source bits to extract.
99 /// \returns The 32-bit result.
100 static __inline__ unsigned int __DEFAULT_FN_ATTRS
101 _pext_u32(unsigned int __X, unsigned int __Y)
102 {
103   return __builtin_ia32_pext_si(__X, __Y);
104 }
105 
106 /// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
107 ///    64-bit product. Stores the upper 32 bits of the product in the
108 ///    memory at \a __P and returns the lower 32 bits.
109 ///
110 /// \code{.operation}
111 /// Store32(__P, (__X * __Y)[63:32])
112 /// result := (__X * __Y)[31:0]
113 /// \endcode
114 ///
115 /// \headerfile <immintrin.h>
116 ///
117 /// This intrinsic corresponds to the \c MULX instruction.
118 ///
119 /// \param __X
120 ///    An unsigned 32-bit multiplicand.
121 /// \param __Y
122 ///    An unsigned 32-bit multiplicand.
123 /// \param __P
124 ///    A pointer to memory for storing the upper half of the product.
125 /// \returns The lower half of the product.
126 static __inline__ unsigned int __DEFAULT_FN_ATTRS
127 _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
128 {
129   unsigned long long __res = (unsigned long long) __X * __Y;
130   *__P = (unsigned int)(__res >> 32);
131   return (unsigned int)__res;
132 }
133 
134 #ifdef  __x86_64__
135 
136 /// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
137 ///    starting at bit number \a __Y.
138 ///
139 /// \code{.operation}
140 /// i := __Y[7:0]
141 /// result := __X
142 /// IF i < 64
143 ///   result[63:i] := 0
144 /// FI
145 /// \endcode
146 ///
147 /// \headerfile <immintrin.h>
148 ///
149 /// This intrinsic corresponds to the \c BZHI instruction.
150 ///
151 /// \param __X
152 ///    The 64-bit source value to copy.
153 /// \param __Y
154 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
155 /// \returns The partially zeroed 64-bit value.
156 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
157 _bzhi_u64(unsigned long long __X, unsigned long long __Y)
158 {
159   return __builtin_ia32_bzhi_di(__X, __Y);
160 }
161 
162 /// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
163 ///    into the 64-bit result, according to the mask in the unsigned 64-bit
164 ///    integer \a __Y. All other bits of the result are zero.
165 ///
166 /// \code{.operation}
167 /// i := 0
168 /// result := 0
169 /// FOR m := 0 TO 63
170 ///   IF __Y[m] == 1
171 ///     result[m] := __X[i]
172 ///     i := i + 1
173 ///   ENDIF
174 /// ENDFOR
175 /// \endcode
176 ///
177 /// \headerfile <immintrin.h>
178 ///
179 /// This intrinsic corresponds to the \c PDEP instruction.
180 ///
181 /// \param __X
182 ///    The 64-bit source value to copy.
183 /// \param __Y
184 ///    The 64-bit mask specifying where to deposit source bits.
185 /// \returns The 64-bit result.
186 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
187 _pdep_u64(unsigned long long __X, unsigned long long __Y)
188 {
189   return __builtin_ia32_pdep_di(__X, __Y);
190 }
191 
192 /// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
193 ///    low-order bits of the 64-bit result, according to the mask in the
194 ///    unsigned 64-bit integer \a __Y. All other bits of the result are zero.
195 ///
196 /// \code{.operation}
197 /// i := 0
198 /// result := 0
199 /// FOR m := 0 TO 63
200 ///   IF __Y[m] == 1
201 ///     result[i] := __X[m]
202 ///     i := i + 1
203 ///   ENDIF
204 /// ENDFOR
205 /// \endcode
206 ///
207 /// \headerfile <immintrin.h>
208 ///
209 /// This intrinsic corresponds to the \c PEXT instruction.
210 ///
211 /// \param __X
212 ///    The 64-bit source value to copy.
213 /// \param __Y
214 ///    The 64-bit mask specifying which source bits to extract.
215 /// \returns The 64-bit result.
216 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
217 _pext_u64(unsigned long long __X, unsigned long long __Y)
218 {
219   return __builtin_ia32_pext_di(__X, __Y);
220 }
221 
222 /// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
223 ///    128-bit product. Stores the upper 64 bits of the product to the
224 ///    memory addressed by \a __P and returns the lower 64 bits.
225 ///
226 /// \code{.operation}
227 /// Store64(__P, (__X * __Y)[127:64])
228 /// result := (__X * __Y)[63:0]
229 /// \endcode
230 ///
231 /// \headerfile <immintrin.h>
232 ///
233 /// This intrinsic corresponds to the \c MULX instruction.
234 ///
235 /// \param __X
236 ///    An unsigned 64-bit multiplicand.
237 /// \param __Y
238 ///    An unsigned 64-bit multiplicand.
239 /// \param __P
240 ///    A pointer to memory for storing the upper half of the product.
241 /// \returns The lower half of the product.
242 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
243 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
244 	   unsigned long long *__P)
245 {
246   unsigned __int128 __res = (unsigned __int128) __X * __Y;
247   *__P = (unsigned long long) (__res >> 64);
248   return (unsigned long long) __res;
249 }
250 
251 #endif /* __x86_64__  */
252 
253 #undef __DEFAULT_FN_ATTRS
254 
255 #endif /* __BMI2INTRIN_H */
256