10b57cec5SDimitry Andric /*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  *
40b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
50b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
60b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
70b57cec5SDimitry Andric  *
80b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
90b57cec5SDimitry Andric  */
100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H
110b57cec5SDimitry Andric #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
120b57cec5SDimitry Andric #endif
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #ifndef __VBMIVLINTRIN_H
150b57cec5SDimitry Andric #define __VBMIVLINTRIN_H
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
185f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128                                                  \
195f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
205f757f3fSDimitry Andric                  __target__("avx512vbmi,avx512vl,no-evex512"),                 \
215f757f3fSDimitry Andric                  __min_vector_width__(128)))
225f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256                                                  \
235f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
245f757f3fSDimitry Andric                  __target__("avx512vbmi,avx512vl,no-evex512"),                 \
255f757f3fSDimitry Andric                  __min_vector_width__(256)))
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutex2var_epi8(__m128i __A,__m128i __I,__m128i __B)280b57cec5SDimitry Andric _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
290b57cec5SDimitry Andric {
300b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
310b57cec5SDimitry Andric                                                  (__v16qi)__I,
320b57cec5SDimitry Andric                                                  (__v16qi)__B);
330b57cec5SDimitry Andric }
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_permutex2var_epi8(__m128i __A,__mmask16 __U,__m128i __I,__m128i __B)360b57cec5SDimitry Andric _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
370b57cec5SDimitry Andric                            __m128i __B)
380b57cec5SDimitry Andric {
390b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectb_128(__U,
400b57cec5SDimitry Andric                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
410b57cec5SDimitry Andric                                   (__v16qi)__A);
420b57cec5SDimitry Andric }
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask2_permutex2var_epi8(__m128i __A,__m128i __I,__mmask16 __U,__m128i __B)450b57cec5SDimitry Andric _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
460b57cec5SDimitry Andric                             __m128i __B)
470b57cec5SDimitry Andric {
480b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectb_128(__U,
490b57cec5SDimitry Andric                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
500b57cec5SDimitry Andric                                   (__v16qi)__I);
510b57cec5SDimitry Andric }
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_permutex2var_epi8(__mmask16 __U,__m128i __A,__m128i __I,__m128i __B)540b57cec5SDimitry Andric _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
550b57cec5SDimitry Andric                             __m128i __B)
560b57cec5SDimitry Andric {
570b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectb_128(__U,
580b57cec5SDimitry Andric                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
590b57cec5SDimitry Andric                                   (__v16qi)_mm_setzero_si128());
600b57cec5SDimitry Andric }
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_permutex2var_epi8(__m256i __A,__m256i __I,__m256i __B)630b57cec5SDimitry Andric _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
640b57cec5SDimitry Andric {
650b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
660b57cec5SDimitry Andric                                                  (__v32qi)__B);
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_permutex2var_epi8(__m256i __A,__mmask32 __U,__m256i __I,__m256i __B)700b57cec5SDimitry Andric _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
710b57cec5SDimitry Andric                               __m256i __B)
720b57cec5SDimitry Andric {
730b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectb_256(__U,
740b57cec5SDimitry Andric                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
750b57cec5SDimitry Andric                                (__v32qi)__A);
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask2_permutex2var_epi8(__m256i __A,__m256i __I,__mmask32 __U,__m256i __B)790b57cec5SDimitry Andric _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
800b57cec5SDimitry Andric                                __m256i __B)
810b57cec5SDimitry Andric {
820b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectb_256(__U,
830b57cec5SDimitry Andric                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
840b57cec5SDimitry Andric                                (__v32qi)__I);
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_permutex2var_epi8(__mmask32 __U,__m256i __A,__m256i __I,__m256i __B)880b57cec5SDimitry Andric _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
890b57cec5SDimitry Andric                                __m256i __B)
900b57cec5SDimitry Andric {
910b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectb_256(__U,
920b57cec5SDimitry Andric                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
930b57cec5SDimitry Andric                                (__v32qi)_mm256_setzero_si256());
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutexvar_epi8(__m128i __A,__m128i __B)970b57cec5SDimitry Andric _mm_permutexvar_epi8 (__m128i __A, __m128i __B)
980b57cec5SDimitry Andric {
990b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
1000b57cec5SDimitry Andric }
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_permutexvar_epi8(__mmask16 __M,__m128i __A,__m128i __B)1030b57cec5SDimitry Andric _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1040b57cec5SDimitry Andric {
1050b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1060b57cec5SDimitry Andric                                         (__v16qi)_mm_permutexvar_epi8(__A, __B),
1070b57cec5SDimitry Andric                                         (__v16qi)_mm_setzero_si128());
1080b57cec5SDimitry Andric }
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_permutexvar_epi8(__m128i __W,__mmask16 __M,__m128i __A,__m128i __B)1110b57cec5SDimitry Andric _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1120b57cec5SDimitry Andric           __m128i __B)
1130b57cec5SDimitry Andric {
1140b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1150b57cec5SDimitry Andric                                         (__v16qi)_mm_permutexvar_epi8(__A, __B),
1160b57cec5SDimitry Andric                                         (__v16qi)__W);
1170b57cec5SDimitry Andric }
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_permutexvar_epi8(__m256i __A,__m256i __B)1200b57cec5SDimitry Andric _mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
1210b57cec5SDimitry Andric {
1220b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_permutexvar_epi8(__mmask32 __M,__m256i __A,__m256i __B)1260b57cec5SDimitry Andric _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
1270b57cec5SDimitry Andric         __m256i __B)
1280b57cec5SDimitry Andric {
1290b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1300b57cec5SDimitry Andric                                      (__v32qi)_mm256_permutexvar_epi8(__A, __B),
1310b57cec5SDimitry Andric                                      (__v32qi)_mm256_setzero_si256());
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_permutexvar_epi8(__m256i __W,__mmask32 __M,__m256i __A,__m256i __B)1350b57cec5SDimitry Andric _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1360b57cec5SDimitry Andric              __m256i __B)
1370b57cec5SDimitry Andric {
1380b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1390b57cec5SDimitry Andric                                      (__v32qi)_mm256_permutexvar_epi8(__A, __B),
1400b57cec5SDimitry Andric                                      (__v32qi)__W);
1410b57cec5SDimitry Andric }
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_multishift_epi64_epi8(__m128i __X,__m128i __Y)1440b57cec5SDimitry Andric _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
1450b57cec5SDimitry Andric {
1460b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
1470b57cec5SDimitry Andric }
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_multishift_epi64_epi8(__m128i __W,__mmask16 __M,__m128i __X,__m128i __Y)1500b57cec5SDimitry Andric _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
1510b57cec5SDimitry Andric                                __m128i __Y)
1520b57cec5SDimitry Andric {
1530b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1540b57cec5SDimitry Andric                                    (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
1550b57cec5SDimitry Andric                                    (__v16qi)__W);
1560b57cec5SDimitry Andric }
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_multishift_epi64_epi8(__mmask16 __M,__m128i __X,__m128i __Y)1590b57cec5SDimitry Andric _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
1600b57cec5SDimitry Andric {
1610b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
1620b57cec5SDimitry Andric                                    (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
1630b57cec5SDimitry Andric                                    (__v16qi)_mm_setzero_si128());
1640b57cec5SDimitry Andric }
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_multishift_epi64_epi8(__m256i __X,__m256i __Y)1670b57cec5SDimitry Andric _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
1680b57cec5SDimitry Andric {
1690b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_multishift_epi64_epi8(__m256i __W,__mmask32 __M,__m256i __X,__m256i __Y)1730b57cec5SDimitry Andric _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
1740b57cec5SDimitry Andric                                   __m256i __Y)
1750b57cec5SDimitry Andric {
1760b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1770b57cec5SDimitry Andric                                 (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
1780b57cec5SDimitry Andric                                 (__v32qi)__W);
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_multishift_epi64_epi8(__mmask32 __M,__m256i __X,__m256i __Y)1820b57cec5SDimitry Andric _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
1830b57cec5SDimitry Andric {
1840b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
1850b57cec5SDimitry Andric                                 (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
1860b57cec5SDimitry Andric                                 (__v32qi)_mm256_setzero_si256());
1870b57cec5SDimitry Andric }
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric 
1900b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128
1910b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric #endif
194