1 /*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
11 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
12 #endif
13 
14 #ifndef __BMIINTRIN_H
15 #define __BMIINTRIN_H
16 
17 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
18    instruction behaves as BSF on non-BMI targets, there is code that expects
19    to use it as a potentially faster version of BSF. */
20 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
21 
22 #define _tzcnt_u16(a)     (__tzcnt_u16((a)))
23 
24 /// Counts the number of trailing zero bits in the operand.
25 ///
26 /// \headerfile <x86intrin.h>
27 ///
28 /// This intrinsic corresponds to the <c> TZCNT </c> instruction.
29 ///
30 /// \param __X
31 ///    An unsigned 16-bit integer whose trailing zeros are to be counted.
32 /// \returns An unsigned 16-bit integer containing the number of trailing zero
33 ///    bits in the operand.
34 static __inline__ unsigned short __RELAXED_FN_ATTRS
35 __tzcnt_u16(unsigned short __X)
36 {
37   return __builtin_ia32_tzcnt_u16(__X);
38 }
39 
40 /// Counts the number of trailing zero bits in the operand.
41 ///
42 /// \headerfile <x86intrin.h>
43 ///
44 /// This intrinsic corresponds to the <c> TZCNT </c> instruction.
45 ///
46 /// \param __X
47 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
48 /// \returns An unsigned 32-bit integer containing the number of trailing zero
49 ///    bits in the operand.
50 /// \see _mm_tzcnt_32
51 static __inline__ unsigned int __RELAXED_FN_ATTRS
52 __tzcnt_u32(unsigned int __X)
53 {
54   return __builtin_ia32_tzcnt_u32(__X);
55 }
56 
57 /// Counts the number of trailing zero bits in the operand.
58 ///
59 /// \headerfile <x86intrin.h>
60 ///
61 /// This intrinsic corresponds to the <c> TZCNT </c> instruction.
62 ///
63 /// \param __X
64 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
65 /// \returns An 32-bit integer containing the number of trailing zero bits in
66 ///    the operand.
67 /// \see __tzcnt_u32
68 static __inline__ int __RELAXED_FN_ATTRS
69 _mm_tzcnt_32(unsigned int __X)
70 {
71   return (int)__builtin_ia32_tzcnt_u32(__X);
72 }
73 
74 #define _tzcnt_u32(a)     (__tzcnt_u32((a)))
75 
76 #ifdef __x86_64__
77 
78 /// Counts the number of trailing zero bits in the operand.
79 ///
80 /// \headerfile <x86intrin.h>
81 ///
82 /// This intrinsic corresponds to the <c> TZCNT </c> instruction.
83 ///
84 /// \param __X
85 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
86 /// \returns An unsigned 64-bit integer containing the number of trailing zero
87 ///    bits in the operand.
88 /// \see _mm_tzcnt_64
89 static __inline__ unsigned long long __RELAXED_FN_ATTRS
90 __tzcnt_u64(unsigned long long __X)
91 {
92   return __builtin_ia32_tzcnt_u64(__X);
93 }
94 
95 /// Counts the number of trailing zero bits in the operand.
96 ///
97 /// \headerfile <x86intrin.h>
98 ///
99 /// This intrinsic corresponds to the <c> TZCNT </c> instruction.
100 ///
101 /// \param __X
102 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
103 /// \returns An 64-bit integer containing the number of trailing zero bits in
104 ///    the operand.
105 /// \see __tzcnt_u64
106 static __inline__ long long __RELAXED_FN_ATTRS
107 _mm_tzcnt_64(unsigned long long __X)
108 {
109   return (long long)__builtin_ia32_tzcnt_u64(__X);
110 }
111 
112 #define _tzcnt_u64(a)     (__tzcnt_u64((a)))
113 
114 #endif /* __x86_64__ */
115 
116 #undef __RELAXED_FN_ATTRS
117 
118 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
119     defined(__BMI__)
120 
121 /* Define the default attributes for the functions in this file. */
122 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
123 
124 #define _andn_u32(a, b)   (__andn_u32((a), (b)))
125 
126 /* _bextr_u32 != __bextr_u32 */
127 #define _blsi_u32(a)      (__blsi_u32((a)))
128 
129 #define _blsmsk_u32(a)    (__blsmsk_u32((a)))
130 
131 #define _blsr_u32(a)      (__blsr_u32((a)))
132 
133 /// Performs a bitwise AND of the second operand with the one's
134 ///    complement of the first operand.
135 ///
136 /// \headerfile <x86intrin.h>
137 ///
138 /// This intrinsic corresponds to the <c> ANDN </c> instruction.
139 ///
140 /// \param __X
141 ///    An unsigned integer containing one of the operands.
142 /// \param __Y
143 ///    An unsigned integer containing one of the operands.
144 /// \returns An unsigned integer containing the bitwise AND of the second
145 ///    operand with the one's complement of the first operand.
146 static __inline__ unsigned int __DEFAULT_FN_ATTRS
147 __andn_u32(unsigned int __X, unsigned int __Y)
148 {
149   return ~__X & __Y;
150 }
151 
152 /* AMD-specified, double-leading-underscore version of BEXTR */
153 /// Extracts the specified bits from the first operand and returns them
154 ///    in the least significant bits of the result.
155 ///
156 /// \headerfile <x86intrin.h>
157 ///
158 /// This intrinsic corresponds to the <c> BEXTR </c> instruction.
159 ///
160 /// \param __X
161 ///    An unsigned integer whose bits are to be extracted.
162 /// \param __Y
163 ///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
164 ///    specify the index of the least significant bit. Bits [15:8] specify the
165 ///    number of bits to be extracted.
166 /// \returns An unsigned integer whose least significant bits contain the
167 ///    extracted bits.
168 /// \see _bextr_u32
169 static __inline__ unsigned int __DEFAULT_FN_ATTRS
170 __bextr_u32(unsigned int __X, unsigned int __Y)
171 {
172   return __builtin_ia32_bextr_u32(__X, __Y);
173 }
174 
175 /* Intel-specified, single-leading-underscore version of BEXTR */
176 /// Extracts the specified bits from the first operand and returns them
177 ///    in the least significant bits of the result.
178 ///
179 /// \headerfile <x86intrin.h>
180 ///
181 /// This intrinsic corresponds to the <c> BEXTR </c> instruction.
182 ///
183 /// \param __X
184 ///    An unsigned integer whose bits are to be extracted.
185 /// \param __Y
186 ///    An unsigned integer used to specify the index of the least significant
187 ///    bit for the bits to be extracted. Bits [7:0] specify the index.
188 /// \param __Z
189 ///    An unsigned integer used to specify the number of bits to be extracted.
190 ///    Bits [7:0] specify the number of bits.
191 /// \returns An unsigned integer whose least significant bits contain the
192 ///    extracted bits.
193 /// \see __bextr_u32
194 static __inline__ unsigned int __DEFAULT_FN_ATTRS
195 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
196 {
197   return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
198 }
199 
200 /* Intel-specified, single-leading-underscore version of BEXTR2 */
201 /// Extracts the specified bits from the first operand and returns them
202 ///    in the least significant bits of the result.
203 ///
204 /// \headerfile <x86intrin.h>
205 ///
206 /// This intrinsic corresponds to the <c> BEXTR </c> instruction.
207 ///
208 /// \param __X
209 ///    An unsigned integer whose bits are to be extracted.
210 /// \param __Y
211 ///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
212 ///    specify the index of the least significant bit. Bits [15:8] specify the
213 ///    number of bits to be extracted.
214 /// \returns An unsigned integer whose least significant bits contain the
215 ///    extracted bits.
216 /// \see __bextr_u32
217 static __inline__ unsigned int __DEFAULT_FN_ATTRS
218 _bextr2_u32(unsigned int __X, unsigned int __Y) {
219   return __builtin_ia32_bextr_u32(__X, __Y);
220 }
221 
222 /// Clears all bits in the source except for the least significant bit
223 ///    containing a value of 1 and returns the result.
224 ///
225 /// \headerfile <x86intrin.h>
226 ///
227 /// This intrinsic corresponds to the <c> BLSI </c> instruction.
228 ///
229 /// \param __X
230 ///    An unsigned integer whose bits are to be cleared.
231 /// \returns An unsigned integer containing the result of clearing the bits from
232 ///    the source operand.
233 static __inline__ unsigned int __DEFAULT_FN_ATTRS
234 __blsi_u32(unsigned int __X)
235 {
236   return __X & -__X;
237 }
238 
239 /// Creates a mask whose bits are set to 1, using bit 0 up to and
240 ///    including the least significant bit that is set to 1 in the source
241 ///    operand and returns the result.
242 ///
243 /// \headerfile <x86intrin.h>
244 ///
245 /// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
246 ///
247 /// \param __X
248 ///    An unsigned integer used to create the mask.
249 /// \returns An unsigned integer containing the newly created mask.
250 static __inline__ unsigned int __DEFAULT_FN_ATTRS
251 __blsmsk_u32(unsigned int __X)
252 {
253   return __X ^ (__X - 1);
254 }
255 
256 /// Clears the least significant bit that is set to 1 in the source
257 ///    operand and returns the result.
258 ///
259 /// \headerfile <x86intrin.h>
260 ///
261 /// This intrinsic corresponds to the <c> BLSR </c> instruction.
262 ///
263 /// \param __X
264 ///    An unsigned integer containing the operand to be cleared.
265 /// \returns An unsigned integer containing the result of clearing the source
266 ///    operand.
267 static __inline__ unsigned int __DEFAULT_FN_ATTRS
268 __blsr_u32(unsigned int __X)
269 {
270   return __X & (__X - 1);
271 }
272 
273 #ifdef __x86_64__
274 
275 #define _andn_u64(a, b)   (__andn_u64((a), (b)))
276 
277 /* _bextr_u64 != __bextr_u64 */
278 #define _blsi_u64(a)      (__blsi_u64((a)))
279 
280 #define _blsmsk_u64(a)    (__blsmsk_u64((a)))
281 
282 #define _blsr_u64(a)      (__blsr_u64((a)))
283 
284 /// Performs a bitwise AND of the second operand with the one's
285 ///    complement of the first operand.
286 ///
287 /// \headerfile <x86intrin.h>
288 ///
289 /// This intrinsic corresponds to the <c> ANDN </c> instruction.
290 ///
291 /// \param __X
292 ///    An unsigned 64-bit integer containing one of the operands.
293 /// \param __Y
294 ///    An unsigned 64-bit integer containing one of the operands.
295 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
296 ///    operand with the one's complement of the first operand.
297 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
298 __andn_u64 (unsigned long long __X, unsigned long long __Y)
299 {
300   return ~__X & __Y;
301 }
302 
303 /* AMD-specified, double-leading-underscore version of BEXTR */
304 /// Extracts the specified bits from the first operand and returns them
305 ///    in the least significant bits of the result.
306 ///
307 /// \headerfile <x86intrin.h>
308 ///
309 /// This intrinsic corresponds to the <c> BEXTR </c> instruction.
310 ///
311 /// \param __X
312 ///    An unsigned 64-bit integer whose bits are to be extracted.
313 /// \param __Y
314 ///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
315 ///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
316 ///    the number of bits to be extracted.
317 /// \returns An unsigned 64-bit integer whose least significant bits contain the
318 ///    extracted bits.
319 /// \see _bextr_u64
320 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
321 __bextr_u64(unsigned long long __X, unsigned long long __Y)
322 {
323   return __builtin_ia32_bextr_u64(__X, __Y);
324 }
325 
326 /* Intel-specified, single-leading-underscore version of BEXTR */
327 /// Extracts the specified bits from the first operand and returns them
328 ///     in the least significant bits of the result.
329 ///
330 /// \headerfile <x86intrin.h>
331 ///
332 /// This intrinsic corresponds to the <c> BEXTR </c> instruction.
333 ///
334 /// \param __X
335 ///    An unsigned 64-bit integer whose bits are to be extracted.
336 /// \param __Y
337 ///    An unsigned integer used to specify the index of the least significant
338 ///    bit for the bits to be extracted. Bits [7:0] specify the index.
339 /// \param __Z
340 ///    An unsigned integer used to specify the number of bits to be extracted.
341 ///    Bits [7:0] specify the number of bits.
342 /// \returns An unsigned 64-bit integer whose least significant bits contain the
343 ///    extracted bits.
344 /// \see __bextr_u64
345 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
346 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
347 {
348   return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
349 }
350 
351 /* Intel-specified, single-leading-underscore version of BEXTR2 */
352 /// Extracts the specified bits from the first operand and returns them
353 ///    in the least significant bits of the result.
354 ///
355 /// \headerfile <x86intrin.h>
356 ///
357 /// This intrinsic corresponds to the <c> BEXTR </c> instruction.
358 ///
359 /// \param __X
360 ///    An unsigned 64-bit integer whose bits are to be extracted.
361 /// \param __Y
362 ///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
363 ///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
364 ///    the number of bits to be extracted.
365 /// \returns An unsigned 64-bit integer whose least significant bits contain the
366 ///    extracted bits.
367 /// \see __bextr_u64
368 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
369 _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
370   return __builtin_ia32_bextr_u64(__X, __Y);
371 }
372 
373 /// Clears all bits in the source except for the least significant bit
374 ///    containing a value of 1 and returns the result.
375 ///
376 /// \headerfile <x86intrin.h>
377 ///
378 /// This intrinsic corresponds to the <c> BLSI </c> instruction.
379 ///
380 /// \param __X
381 ///    An unsigned 64-bit integer whose bits are to be cleared.
382 /// \returns An unsigned 64-bit integer containing the result of clearing the
383 ///    bits from the source operand.
384 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
385 __blsi_u64(unsigned long long __X)
386 {
387   return __X & -__X;
388 }
389 
390 /// Creates a mask whose bits are set to 1, using bit 0 up to and
391 ///    including the least significant bit that is set to 1 in the source
392 ///    operand and returns the result.
393 ///
394 /// \headerfile <x86intrin.h>
395 ///
396 /// This intrinsic corresponds to the <c> BLSMSK </c> instruction.
397 ///
398 /// \param __X
399 ///    An unsigned 64-bit integer used to create the mask.
400 /// \returns An unsigned 64-bit integer containing the newly created mask.
401 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
402 __blsmsk_u64(unsigned long long __X)
403 {
404   return __X ^ (__X - 1);
405 }
406 
407 /// Clears the least significant bit that is set to 1 in the source
408 ///    operand and returns the result.
409 ///
410 /// \headerfile <x86intrin.h>
411 ///
412 /// This intrinsic corresponds to the <c> BLSR </c> instruction.
413 ///
414 /// \param __X
415 ///    An unsigned 64-bit integer containing the operand to be cleared.
416 /// \returns An unsigned 64-bit integer containing the result of clearing the
417 ///    source operand.
418 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
419 __blsr_u64(unsigned long long __X)
420 {
421   return __X & (__X - 1);
422 }
423 
424 #endif /* __x86_64__ */
425 
426 #undef __DEFAULT_FN_ATTRS
427 
428 #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules)   \
429           || defined(__BMI__) */
430 
431 #endif /* __BMIINTRIN_H */
432