1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 
3    This file is part of GCC.
4 
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
26 #endif
27 
28 #ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
29 #define _AVX512VNNIVLINTRIN_H_INCLUDED
30 
31 #if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512vnni,avx512vl")
34 #define __DISABLE_AVX512VNNIVL__
35 #endif /* __AVX512VNNIVL__ */
36 
37 #define _mm256_dpbusd_epi32(A, B, C)				\
38   ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A),	\
39 					   (__v8si) (B),	\
40 					   (__v8si) (C)))
41 
42 extern __inline __m256i
43 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_dpbusd_epi32(__m256i __A,__mmask8 __B,__m256i __C,__m256i __D)44 _mm256_mask_dpbusd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
45 {
46   return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C,
47 						(__v8si) __D, (__mmask8)__B);
48 }
49 
50 extern __inline __m256i
51 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_dpbusd_epi32(__mmask8 __A,__m256i __B,__m256i __C,__m256i __D)52 _mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
53 {
54   return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz ((__v8si)__B,
55 				(__v8si) __C, (__v8si) __D, (__mmask8)__A);
56 }
57 
58 #define _mm_dpbusd_epi32(A, B, C)				\
59   ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A),	\
60 					   (__v4si) (B),	\
61 					   (__v4si) (C)))
62 
63 extern __inline __m128i
64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpbusd_epi32(__m128i __A,__mmask8 __B,__m128i __C,__m128i __D)65 _mm_mask_dpbusd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
66 {
67   return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C,
68 						(__v4si) __D, (__mmask8)__B);
69 }
70 
71 extern __inline __m128i
72 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_dpbusd_epi32(__mmask8 __A,__m128i __B,__m128i __C,__m128i __D)73 _mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
74 {
75   return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz ((__v4si)__B,
76 				(__v4si) __C, (__v4si) __D, (__mmask8)__A);
77 }
78 
79 #define _mm256_dpbusds_epi32(A, B, C)				\
80   ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A),	\
81 					    (__v8si) (B),	\
82 					    (__v8si) (C)))
83 
84 extern __inline __m256i
85 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_dpbusds_epi32(__m256i __A,__mmask8 __B,__m256i __C,__m256i __D)86 _mm256_mask_dpbusds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
87 {
88   return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask ((__v8si)__A,
89 				(__v8si) __C, (__v8si) __D, (__mmask8)__B);
90 }
91 
92 extern __inline __m256i
93 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_dpbusds_epi32(__mmask8 __A,__m256i __B,__m256i __C,__m256i __D)94 _mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
95 								__m256i __D)
96 {
97   return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz ((__v8si)__B,
98 				(__v8si) __C, (__v8si) __D, (__mmask8)__A);
99 }
100 
101 #define _mm_dpbusds_epi32(A, B, C)				\
102   ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A),	\
103 					    (__v4si) (B),	\
104 					    (__v4si) (C)))
105 
106 extern __inline __m128i
107 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpbusds_epi32(__m128i __A,__mmask8 __B,__m128i __C,__m128i __D)108 _mm_mask_dpbusds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
109 {
110   return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask ((__v4si)__A,
111 				(__v4si) __C, (__v4si) __D, (__mmask8)__B);
112 }
113 
114 extern __inline __m128i
115 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_dpbusds_epi32(__mmask8 __A,__m128i __B,__m128i __C,__m128i __D)116 _mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
117 {
118   return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz ((__v4si)__B,
119 				(__v4si) __C, (__v4si) __D, (__mmask8)__A);
120 }
121 
122 #define _mm256_dpwssd_epi32(A, B, C)				\
123   ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A),	\
124 					   (__v8si) (B),	\
125 					   (__v8si) (C)))
126 
127 extern __inline __m256i
128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_dpwssd_epi32(__m256i __A,__mmask8 __B,__m256i __C,__m256i __D)129 _mm256_mask_dpwssd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
130 {
131   return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C,
132 						(__v8si) __D, (__mmask8)__B);
133 }
134 
135 extern __inline __m256i
136 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_dpwssd_epi32(__mmask8 __A,__m256i __B,__m256i __C,__m256i __D)137 _mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
138 {
139   return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz ((__v8si)__B,
140 				(__v8si) __C, (__v8si) __D, (__mmask8)__A);
141 }
142 
143 #define _mm_dpwssd_epi32(A, B, C)				\
144   ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A),	\
145 					   (__v4si) (B),	\
146 					   (__v4si) (C)))
147 
148 extern __inline __m128i
149 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpwssd_epi32(__m128i __A,__mmask8 __B,__m128i __C,__m128i __D)150 _mm_mask_dpwssd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
151 {
152   return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C,
153 						(__v4si) __D, (__mmask8)__B);
154 }
155 
156 extern __inline __m128i
157 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_dpwssd_epi32(__mmask8 __A,__m128i __B,__m128i __C,__m128i __D)158 _mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
159 {
160   return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz ((__v4si)__B,
161 				(__v4si) __C, (__v4si) __D, (__mmask8)__A);
162 }
163 
164 #define _mm256_dpwssds_epi32(A, B, C)				\
165   ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A),	\
166 					    (__v8si) (B),	\
167 					    (__v8si) (C)))
168 
169 extern __inline __m256i
170 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_dpwssds_epi32(__m256i __A,__mmask8 __B,__m256i __C,__m256i __D)171 _mm256_mask_dpwssds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
172 {
173   return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask ((__v8si)__A,
174 				(__v8si) __C, (__v8si) __D, (__mmask8)__B);
175 }
176 
177 extern __inline __m256i
178 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_dpwssds_epi32(__mmask8 __A,__m256i __B,__m256i __C,__m256i __D)179 _mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
180 							__m256i __D)
181 {
182   return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz ((__v8si)__B,
183 				(__v8si) __C, (__v8si) __D, (__mmask8)__A);
184 }
185 
186 #define _mm_dpwssds_epi32(A, B, C)				\
187   ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A),	\
188 					    (__v4si) (B),	\
189 					    (__v4si) (C)))
190 
191 extern __inline __m128i
192 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpwssds_epi32(__m128i __A,__mmask8 __B,__m128i __C,__m128i __D)193 _mm_mask_dpwssds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
194 {
195   return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask ((__v4si)__A,
196 				(__v4si) __C, (__v4si) __D, (__mmask8)__B);
197 }
198 
199 extern __inline __m128i
200 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_dpwssds_epi32(__mmask8 __A,__m128i __B,__m128i __C,__m128i __D)201 _mm_maskz_dpwssds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
202 {
203   return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz ((__v4si)__B,
204 				(__v4si) __C, (__v4si) __D, (__mmask8)__A);
205 }
206 #ifdef __DISABLE_AVX512VNNIVL__
207 #undef __DISABLE_AVX512VNNIVL__
208 #pragma GCC pop_options
209 #endif /* __DISABLE_AVX512VNNIVL__ */
210 #endif /* __DISABLE_AVX512VNNIVL__ */
211