1 /*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
11 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
12 #endif
13 
14 #ifndef __BMIINTRIN_H
15 #define __BMIINTRIN_H
16 
17 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
18    instruction behaves as BSF on non-BMI targets, there is code that expects
19    to use it as a potentially faster version of BSF. */
20 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
21 
22 /// Counts the number of trailing zero bits in the operand.
23 ///
24 /// \headerfile <x86intrin.h>
25 ///
26 /// This intrinsic corresponds to the \c TZCNT instruction.
27 ///
28 /// \param __X
29 ///    An unsigned 16-bit integer whose trailing zeros are to be counted.
30 /// \returns An unsigned 16-bit integer containing the number of trailing zero
31 ///    bits in the operand.
32 /// \see _tzcnt_u16
33 static __inline__ unsigned short __RELAXED_FN_ATTRS
34 __tzcnt_u16(unsigned short __X)
35 {
36   return __builtin_ia32_tzcnt_u16(__X);
37 }
38 
39 /// Counts the number of trailing zero bits in the operand.
40 ///
41 /// \headerfile <x86intrin.h>
42 ///
43 /// \code
44 /// unsigned short _tzcnt_u16(unsigned short __X);
45 /// \endcode
46 ///
47 /// This intrinsic corresponds to the \c TZCNT instruction.
48 ///
49 /// \param __X
50 ///    An unsigned 16-bit integer whose trailing zeros are to be counted.
51 /// \returns An unsigned 16-bit integer containing the number of trailing zero
52 ///    bits in the operand.
53 /// \see __tzcnt_u16
54 #define _tzcnt_u16 __tzcnt_u16
55 
56 /// Counts the number of trailing zero bits in the operand.
57 ///
58 /// \headerfile <x86intrin.h>
59 ///
60 /// This intrinsic corresponds to the \c TZCNT instruction.
61 ///
62 /// \param __X
63 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
64 /// \returns An unsigned 32-bit integer containing the number of trailing zero
65 ///    bits in the operand.
66 /// \see { _mm_tzcnt_32 _tzcnt_u32 }
67 static __inline__ unsigned int __RELAXED_FN_ATTRS
68 __tzcnt_u32(unsigned int __X)
69 {
70   return __builtin_ia32_tzcnt_u32(__X);
71 }
72 
73 /// Counts the number of trailing zero bits in the operand.
74 ///
75 /// \headerfile <x86intrin.h>
76 ///
77 /// This intrinsic corresponds to the \c TZCNT instruction.
78 ///
79 /// \param __X
80 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
81 /// \returns A 32-bit integer containing the number of trailing zero bits in
82 ///    the operand.
83 /// \see { __tzcnt_u32 _tzcnt_u32 }
84 static __inline__ int __RELAXED_FN_ATTRS
85 _mm_tzcnt_32(unsigned int __X)
86 {
87   return (int)__builtin_ia32_tzcnt_u32(__X);
88 }
89 
90 /// Counts the number of trailing zero bits in the operand.
91 ///
92 /// \headerfile <x86intrin.h>
93 ///
94 /// \code
95 /// unsigned int _tzcnt_u32(unsigned int __X);
96 /// \endcode
97 ///
98 /// This intrinsic corresponds to the \c TZCNT instruction.
99 ///
100 /// \param __X
101 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
102 /// \returns An unsigned 32-bit integer containing the number of trailing zero
103 ///    bits in the operand.
104 /// \see { _mm_tzcnt_32 __tzcnt_u32 }
105 #define _tzcnt_u32 __tzcnt_u32
106 
107 #ifdef __x86_64__
108 
109 /// Counts the number of trailing zero bits in the operand.
110 ///
111 /// \headerfile <x86intrin.h>
112 ///
113 /// This intrinsic corresponds to the \c TZCNT instruction.
114 ///
115 /// \param __X
116 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
117 /// \returns An unsigned 64-bit integer containing the number of trailing zero
118 ///    bits in the operand.
119 /// \see { _mm_tzcnt_64 _tzcnt_u64 }
120 static __inline__ unsigned long long __RELAXED_FN_ATTRS
121 __tzcnt_u64(unsigned long long __X)
122 {
123   return __builtin_ia32_tzcnt_u64(__X);
124 }
125 
126 /// Counts the number of trailing zero bits in the operand.
127 ///
128 /// \headerfile <x86intrin.h>
129 ///
130 /// This intrinsic corresponds to the \c TZCNT instruction.
131 ///
132 /// \param __X
133 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
134 /// \returns An 64-bit integer containing the number of trailing zero bits in
135 ///    the operand.
136 /// \see { __tzcnt_u64 _tzcnt_u64 }
137 static __inline__ long long __RELAXED_FN_ATTRS
138 _mm_tzcnt_64(unsigned long long __X)
139 {
140   return (long long)__builtin_ia32_tzcnt_u64(__X);
141 }
142 
143 /// Counts the number of trailing zero bits in the operand.
144 ///
145 /// \headerfile <x86intrin.h>
146 ///
147 /// \code
148 /// unsigned long long _tzcnt_u64(unsigned long long __X);
149 /// \endcode
150 ///
151 /// This intrinsic corresponds to the \c TZCNT instruction.
152 ///
153 /// \param __X
154 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
155 /// \returns An unsigned 64-bit integer containing the number of trailing zero
156 ///    bits in the operand.
157 /// \see { _mm_tzcnt_64 __tzcnt_u64
158 #define _tzcnt_u64 __tzcnt_u64
159 
160 #endif /* __x86_64__ */
161 
162 #undef __RELAXED_FN_ATTRS
163 
164 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
165     defined(__BMI__)
166 
167 /* Define the default attributes for the functions in this file. */
168 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
169 
170 /// Performs a bitwise AND of the second operand with the one's
171 ///    complement of the first operand.
172 ///
173 /// \headerfile <x86intrin.h>
174 ///
175 /// This intrinsic corresponds to the \c ANDN instruction.
176 ///
177 /// \param __X
178 ///    An unsigned integer containing one of the operands.
179 /// \param __Y
180 ///    An unsigned integer containing one of the operands.
181 /// \returns An unsigned integer containing the bitwise AND of the second
182 ///    operand with the one's complement of the first operand.
183 /// \see _andn_u32
184 static __inline__ unsigned int __DEFAULT_FN_ATTRS
185 __andn_u32(unsigned int __X, unsigned int __Y)
186 {
187   return ~__X & __Y;
188 }
189 
190 /// Performs a bitwise AND of the second operand with the one's
191 ///    complement of the first operand.
192 ///
193 /// \headerfile <x86intrin.h>
194 ///
195 /// \code
196 /// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
197 /// \endcode
198 ///
199 /// This intrinsic corresponds to the \c ANDN instruction.
200 ///
201 /// \param __X
202 ///    An unsigned integer containing one of the operands.
203 /// \param __Y
204 ///    An unsigned integer containing one of the operands.
205 /// \returns An unsigned integer containing the bitwise AND of the second
206 ///    operand with the one's complement of the first operand.
207 /// \see __andn_u32
208 #define _andn_u32 __andn_u32
209 
210 /* AMD-specified, double-leading-underscore version of BEXTR */
211 /// Extracts the specified bits from the first operand and returns them
212 ///    in the least significant bits of the result.
213 ///
214 /// \headerfile <x86intrin.h>
215 ///
216 /// This intrinsic corresponds to the \c BEXTR instruction.
217 ///
218 /// \param __X
219 ///    An unsigned integer whose bits are to be extracted.
220 /// \param __Y
221 ///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
222 ///    specify the index of the least significant bit. Bits [15:8] specify the
223 ///    number of bits to be extracted.
224 /// \returns An unsigned integer whose least significant bits contain the
225 ///    extracted bits.
226 /// \see _bextr_u32
227 static __inline__ unsigned int __DEFAULT_FN_ATTRS
228 __bextr_u32(unsigned int __X, unsigned int __Y)
229 {
230   return __builtin_ia32_bextr_u32(__X, __Y);
231 }
232 
233 /* Intel-specified, single-leading-underscore version of BEXTR */
234 /// Extracts the specified bits from the first operand and returns them
235 ///    in the least significant bits of the result.
236 ///
237 /// \headerfile <x86intrin.h>
238 ///
239 /// This intrinsic corresponds to the \c BEXTR instruction.
240 ///
241 /// \param __X
242 ///    An unsigned integer whose bits are to be extracted.
243 /// \param __Y
244 ///    An unsigned integer used to specify the index of the least significant
245 ///    bit for the bits to be extracted. Bits [7:0] specify the index.
246 /// \param __Z
247 ///    An unsigned integer used to specify the number of bits to be extracted.
248 ///    Bits [7:0] specify the number of bits.
249 /// \returns An unsigned integer whose least significant bits contain the
250 ///    extracted bits.
251 /// \see __bextr_u32
252 static __inline__ unsigned int __DEFAULT_FN_ATTRS
253 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
254 {
255   return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
256 }
257 
258 /* Intel-specified, single-leading-underscore version of BEXTR2 */
259 /// Extracts the specified bits from the first operand and returns them
260 ///    in the least significant bits of the result.
261 ///
262 /// \headerfile <x86intrin.h>
263 ///
264 /// This intrinsic corresponds to the \c BEXTR instruction.
265 ///
266 /// \param __X
267 ///    An unsigned integer whose bits are to be extracted.
268 /// \param __Y
269 ///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
270 ///    specify the index of the least significant bit. Bits [15:8] specify the
271 ///    number of bits to be extracted.
272 /// \returns An unsigned integer whose least significant bits contain the
273 ///    extracted bits.
274 /// \see __bextr_u32
275 static __inline__ unsigned int __DEFAULT_FN_ATTRS
276 _bextr2_u32(unsigned int __X, unsigned int __Y) {
277   return __builtin_ia32_bextr_u32(__X, __Y);
278 }
279 
280 /// Clears all bits in the source except for the least significant bit
281 ///    containing a value of 1 and returns the result.
282 ///
283 /// \headerfile <x86intrin.h>
284 ///
285 /// This intrinsic corresponds to the \c BLSI instruction.
286 ///
287 /// \param __X
288 ///    An unsigned integer whose bits are to be cleared.
289 /// \returns An unsigned integer containing the result of clearing the bits from
290 ///    the source operand.
291 /// \see _blsi_u32
292 static __inline__ unsigned int __DEFAULT_FN_ATTRS
293 __blsi_u32(unsigned int __X)
294 {
295   return __X & -__X;
296 }
297 
298 /// Clears all bits in the source except for the least significant bit
299 ///    containing a value of 1 and returns the result.
300 ///
301 /// \headerfile <x86intrin.h>
302 ///
303 /// \code
304 /// unsigned int _blsi_u32(unsigned int __X);
305 /// \endcode
306 ///
307 /// This intrinsic corresponds to the \c BLSI instruction.
308 ///
309 /// \param __X
310 ///    An unsigned integer whose bits are to be cleared.
311 /// \returns An unsigned integer containing the result of clearing the bits from
312 ///    the source operand.
313 /// \see __blsi_u32
314 #define _blsi_u32 __blsi_u32
315 
316 /// Creates a mask whose bits are set to 1, using bit 0 up to and
317 ///    including the least significant bit that is set to 1 in the source
318 ///    operand and returns the result.
319 ///
320 /// \headerfile <x86intrin.h>
321 ///
322 /// This intrinsic corresponds to the \c BLSMSK instruction.
323 ///
324 /// \param __X
325 ///    An unsigned integer used to create the mask.
326 /// \returns An unsigned integer containing the newly created mask.
327 /// \see _blsmsk_u32
328 static __inline__ unsigned int __DEFAULT_FN_ATTRS
329 __blsmsk_u32(unsigned int __X)
330 {
331   return __X ^ (__X - 1);
332 }
333 
334 /// Creates a mask whose bits are set to 1, using bit 0 up to and
335 ///    including the least significant bit that is set to 1 in the source
336 ///    operand and returns the result.
337 ///
338 /// \headerfile <x86intrin.h>
339 ///
340 /// \code
341 /// unsigned int _blsmsk_u32(unsigned int __X);
342 /// \endcode
343 ///
344 /// This intrinsic corresponds to the \c BLSMSK instruction.
345 ///
346 /// \param __X
347 ///    An unsigned integer used to create the mask.
348 /// \returns An unsigned integer containing the newly created mask.
349 /// \see __blsmsk_u32
350 #define _blsmsk_u32 __blsmsk_u32
351 
352 /// Clears the least significant bit that is set to 1 in the source
353 ///    operand and returns the result.
354 ///
355 /// \headerfile <x86intrin.h>
356 ///
357 /// This intrinsic corresponds to the \c BLSR instruction.
358 ///
359 /// \param __X
360 ///    An unsigned integer containing the operand to be cleared.
361 /// \returns An unsigned integer containing the result of clearing the source
362 ///    operand.
363 /// \see _blsr_u32
364 static __inline__ unsigned int __DEFAULT_FN_ATTRS
365 __blsr_u32(unsigned int __X)
366 {
367   return __X & (__X - 1);
368 }
369 
370 /// Clears the least significant bit that is set to 1 in the source
371 ///    operand and returns the result.
372 ///
373 /// \headerfile <x86intrin.h>
374 ///
375 /// \code
376 /// unsigned int _bls4_u32(unsigned int __X);
377 /// \endcode
378 ///
379 /// This intrinsic corresponds to the \c BLSR instruction.
380 ///
381 /// \param __X
382 ///    An unsigned integer containing the operand to be cleared.
383 /// \returns An unsigned integer containing the result of clearing the source
384 ///    operand.
385 /// \see __blsr_u32
386 #define _blsr_u32 __blsr_u32
387 
388 #ifdef __x86_64__
389 
390 /// Performs a bitwise AND of the second operand with the one's
391 ///    complement of the first operand.
392 ///
393 /// \headerfile <x86intrin.h>
394 ///
395 /// This intrinsic corresponds to the \c ANDN instruction.
396 ///
397 /// \param __X
398 ///    An unsigned 64-bit integer containing one of the operands.
399 /// \param __Y
400 ///    An unsigned 64-bit integer containing one of the operands.
401 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
402 ///    operand with the one's complement of the first operand.
403 /// \see _andn_u64
404 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
405 __andn_u64 (unsigned long long __X, unsigned long long __Y)
406 {
407   return ~__X & __Y;
408 }
409 
410 /// Performs a bitwise AND of the second operand with the one's
411 ///    complement of the first operand.
412 ///
413 /// \headerfile <x86intrin.h>
414 ///
415 /// \code
416 /// unsigned long long _andn_u64(unsigned long long __X,
417 ///                              unsigned long long __Y);
418 /// \endcode
419 ///
420 /// This intrinsic corresponds to the \c ANDN instruction.
421 ///
422 /// \param __X
423 ///    An unsigned 64-bit integer containing one of the operands.
424 /// \param __Y
425 ///    An unsigned 64-bit integer containing one of the operands.
426 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
427 ///    operand with the one's complement of the first operand.
428 /// \see __andn_u64
429 #define _andn_u64 __andn_u64
430 
431 /* AMD-specified, double-leading-underscore version of BEXTR */
432 /// Extracts the specified bits from the first operand and returns them
433 ///    in the least significant bits of the result.
434 ///
435 /// \headerfile <x86intrin.h>
436 ///
437 /// This intrinsic corresponds to the \c BEXTR instruction.
438 ///
439 /// \param __X
440 ///    An unsigned 64-bit integer whose bits are to be extracted.
441 /// \param __Y
442 ///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
443 ///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
444 ///    the number of bits to be extracted.
445 /// \returns An unsigned 64-bit integer whose least significant bits contain the
446 ///    extracted bits.
447 /// \see _bextr_u64
448 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
449 __bextr_u64(unsigned long long __X, unsigned long long __Y)
450 {
451   return __builtin_ia32_bextr_u64(__X, __Y);
452 }
453 
454 /* Intel-specified, single-leading-underscore version of BEXTR */
455 /// Extracts the specified bits from the first operand and returns them
456 ///     in the least significant bits of the result.
457 ///
458 /// \headerfile <x86intrin.h>
459 ///
460 /// This intrinsic corresponds to the \c BEXTR instruction.
461 ///
462 /// \param __X
463 ///    An unsigned 64-bit integer whose bits are to be extracted.
464 /// \param __Y
465 ///    An unsigned integer used to specify the index of the least significant
466 ///    bit for the bits to be extracted. Bits [7:0] specify the index.
467 /// \param __Z
468 ///    An unsigned integer used to specify the number of bits to be extracted.
469 ///    Bits [7:0] specify the number of bits.
470 /// \returns An unsigned 64-bit integer whose least significant bits contain the
471 ///    extracted bits.
472 /// \see __bextr_u64
473 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
474 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
475 {
476   return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
477 }
478 
479 /* Intel-specified, single-leading-underscore version of BEXTR2 */
480 /// Extracts the specified bits from the first operand and returns them
481 ///    in the least significant bits of the result.
482 ///
483 /// \headerfile <x86intrin.h>
484 ///
485 /// This intrinsic corresponds to the \c BEXTR instruction.
486 ///
487 /// \param __X
488 ///    An unsigned 64-bit integer whose bits are to be extracted.
489 /// \param __Y
490 ///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
491 ///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
492 ///    the number of bits to be extracted.
493 /// \returns An unsigned 64-bit integer whose least significant bits contain the
494 ///    extracted bits.
495 /// \see __bextr_u64
496 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
497 _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
498   return __builtin_ia32_bextr_u64(__X, __Y);
499 }
500 
501 /// Clears all bits in the source except for the least significant bit
502 ///    containing a value of 1 and returns the result.
503 ///
504 /// \headerfile <x86intrin.h>
505 ///
506 /// This intrinsic corresponds to the \c BLSI instruction.
507 ///
508 /// \param __X
509 ///    An unsigned 64-bit integer whose bits are to be cleared.
510 /// \returns An unsigned 64-bit integer containing the result of clearing the
511 ///    bits from the source operand.
512 /// \see _blsi_u64
513 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
514 __blsi_u64(unsigned long long __X)
515 {
516   return __X & -__X;
517 }
518 
519 /// Clears all bits in the source except for the least significant bit
520 ///    containing a value of 1 and returns the result.
521 ///
522 /// \headerfile <x86intrin.h>
523 ///
524 /// \code
525 /// unsigned long long _blsi_u64(unsigned long long __X);
526 /// \endcode
527 ///
528 /// This intrinsic corresponds to the \c BLSI instruction.
529 ///
530 /// \param __X
531 ///    An unsigned 64-bit integer whose bits are to be cleared.
532 /// \returns An unsigned 64-bit integer containing the result of clearing the
533 ///    bits from the source operand.
534 /// \see __blsi_u64
535 #define _blsi_u64 __blsi_u64
536 
537 /// Creates a mask whose bits are set to 1, using bit 0 up to and
538 ///    including the least significant bit that is set to 1 in the source
539 ///    operand and returns the result.
540 ///
541 /// \headerfile <x86intrin.h>
542 ///
543 /// This intrinsic corresponds to the \c BLSMSK instruction.
544 ///
545 /// \param __X
546 ///    An unsigned 64-bit integer used to create the mask.
547 /// \returns An unsigned 64-bit integer containing the newly created mask.
548 /// \see _blsmsk_u64
549 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
550 __blsmsk_u64(unsigned long long __X)
551 {
552   return __X ^ (__X - 1);
553 }
554 
555 /// Creates a mask whose bits are set to 1, using bit 0 up to and
556 ///    including the least significant bit that is set to 1 in the source
557 ///    operand and returns the result.
558 ///
559 /// \headerfile <x86intrin.h>
560 ///
561 /// \code
562 /// unsigned long long _blsmsk_u64(unsigned long long __X);
563 /// \endcode
564 ///
565 /// This intrinsic corresponds to the \c BLSMSK instruction.
566 ///
567 /// \param __X
568 ///    An unsigned 64-bit integer used to create the mask.
569 /// \returns An unsigned 64-bit integer containing the newly created mask.
570 /// \see __blsmsk_u64
571 #define _blsmsk_u64 __blsmsk_u64
572 
573 /// Clears the least significant bit that is set to 1 in the source
574 ///    operand and returns the result.
575 ///
576 /// \headerfile <x86intrin.h>
577 ///
578 /// This intrinsic corresponds to the \c BLSR instruction.
579 ///
580 /// \param __X
581 ///    An unsigned 64-bit integer containing the operand to be cleared.
582 /// \returns An unsigned 64-bit integer containing the result of clearing the
583 ///    source operand.
584 /// \see _blsr_u64
585 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
586 __blsr_u64(unsigned long long __X)
587 {
588   return __X & (__X - 1);
589 }
590 
591 /// Clears the least significant bit that is set to 1 in the source
592 ///    operand and returns the result.
593 ///
594 /// \headerfile <x86intrin.h>
595 ///
596 /// \code
597 /// unsigned long long _blsr_u64(unsigned long long __X);
598 /// \endcode
599 ///
600 /// This intrinsic corresponds to the \c BLSR instruction.
601 ///
602 /// \param __X
603 ///    An unsigned 64-bit integer containing the operand to be cleared.
604 /// \returns An unsigned 64-bit integer containing the result of clearing the
605 ///    source operand.
606 /// \see __blsr_u64
607 #define _blsr_u64 __blsr_u64
608 
609 #endif /* __x86_64__ */
610 
611 #undef __DEFAULT_FN_ATTRS
612 
613 #endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules)   \
614           || defined(__BMI__) */
615 
616 #endif /* __BMIINTRIN_H */
617