xref: /reactos/sdk/include/vcruntime/intrin.h (revision cc3672cb)
1 
2 #pragma once
3 #define __INTRIN_H_
4 
5 #ifndef RC_INVOKED
6 
7 #include <vcruntime.h>
8 #include <setjmp.h>
9 #include <stddef.h>
10 
11 #if defined(_M_IX86) || defined(_M_X64)
12 #include <immintrin.h>
13 //#include <ammintrin.h>
14 #include <xmmintrin.h> // native headers: immintrin.h -> wmmintrin.h -> nmmintrin.h -> smmintrin.h -> tmmintrin.h -> pmmintrin.h -> emmintrin.h
15 #endif /* _M_IX86 || _M_X64 */
16 
17 #if defined(_M_IX86)
18 //#include <mm3dnow.h>
19 #endif /* _M_IX86 */
20 
21 #if defined(_M_ARM)
22 //#include <arm_neon.h>
23 #include <armintr.h>
24 #endif /* _M_ARM */
25 
26 #if defined(__cplusplus)
27 extern "C" {
28 #endif
29 
30 void* _AddressOfReturnAddress(void);
31 unsigned char _BitScanForward(unsigned long * _Index, unsigned long _Mask);
32 unsigned char _BitScanReverse(unsigned long * _Index, unsigned long _Mask);
33 long _InterlockedAnd(_Interlocked_operand_ long volatile * _Value, long _Mask);
34 short _InterlockedAnd16(_Interlocked_operand_ short volatile * _Value, short _Mask);
35 char _InterlockedAnd8(_Interlocked_operand_ char volatile * _Value, char _Mask);
36 long __cdecl _InterlockedCompareExchange(_Interlocked_operand_ long volatile * _Destination, long _Exchange, long _Comparand);
37 short _InterlockedCompareExchange16(_Interlocked_operand_ short volatile * _Destination, short _Exchange, short _Comparand);
38 __int64 _InterlockedCompareExchange64(_Interlocked_operand_ __int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand);
39 char _InterlockedCompareExchange8(_Interlocked_operand_ char volatile * _Destination, char _Exchange, char _Comparand);
40 void * _InterlockedCompareExchangePointer(_Interlocked_operand_ void * volatile * _Destination, void * _Exchange, void * _Comparand);
41 long __cdecl _InterlockedDecrement(_Interlocked_operand_ long volatile * _Addend);
42 short _InterlockedDecrement16(_Interlocked_operand_ short volatile * _Addend);
43 long __cdecl _InterlockedExchange(_Interlocked_operand_ long volatile * _Target, long _Value);
44 short _InterlockedExchange16(_Interlocked_operand_ short volatile * _Target, short _Value);
45 char _InterlockedExchange8(_Interlocked_operand_ char volatile * _Target, char _Value);
46 long __cdecl _InterlockedExchangeAdd(_Interlocked_operand_ long volatile * _Addend, long _Value);
47 short _InterlockedExchangeAdd16(_Interlocked_operand_ short volatile * _Addend, short _Value);
48 char _InterlockedExchangeAdd8(_Interlocked_operand_ char volatile * _Addend, char _Value);
49 void * _InterlockedExchangePointer(_Interlocked_operand_ void * volatile * _Target, void * _Value);
50 long __cdecl _InterlockedIncrement(_Interlocked_operand_ long volatile * _Addend);
51 short _InterlockedIncrement16(_Interlocked_operand_ short volatile * _Addend);
52 long _InterlockedOr(_Interlocked_operand_ long volatile * _Value, long _Mask);
53 short _InterlockedOr16(_Interlocked_operand_ short volatile * _Value, short _Mask);
54 char _InterlockedOr8(_Interlocked_operand_ char volatile * _Value, char _Mask);
55 long _InterlockedXor(_Interlocked_operand_ long volatile * _Value, long _Mask);
56 short _InterlockedXor16(_Interlocked_operand_ short volatile * _Value, short _Mask);
57 char _InterlockedXor8(_Interlocked_operand_ char volatile * _Value, char _Mask);
58 void _ReadBarrier(void);
59 void _ReadWriteBarrier(void);
60 void * _ReturnAddress(void);
61 void _WriteBarrier(void);
62 void __code_seg(const char *);
63 void __cdecl __debugbreak(void);
64 __declspec(noreturn) void __fastfail(unsigned int);
65 void __nop(void);
66 __int64 __cdecl _abs64(__int64);
67 unsigned char _bittest(long const *, long);
68 unsigned char _bittestandcomplement(long *, long);
69 unsigned char _bittestandreset(long *, long);
70 unsigned char _bittestandset(long *, long);
71 _Check_return_ unsigned __int64 __cdecl _byteswap_uint64(_In_ unsigned __int64);
72 _Check_return_ unsigned long __cdecl _byteswap_ulong(_In_ unsigned long);
73 _Check_return_ unsigned short __cdecl _byteswap_ushort(_In_ unsigned short);
74 void __cdecl _disable(void);
75 void __cdecl _enable(void);
76 unsigned char _interlockedbittestandreset(long volatile *, long);
77 unsigned char _interlockedbittestandset(long volatile *, long);
78 _Check_return_ unsigned long __cdecl _lrotl(_In_ unsigned long, _In_ int);
79 _Check_return_ unsigned long __cdecl _lrotr(_In_ unsigned long, _In_ int);
80 _Check_return_ unsigned int __cdecl _rotl(_In_ unsigned int _Value, _In_ int _Shift);
81 _Check_return_ unsigned short __cdecl _rotl16(_In_ unsigned short _Value, _In_ unsigned char _Shift);
82 _Check_return_ unsigned __int64 __cdecl _rotl64(_In_ unsigned __int64 _Value, _In_ int _Shift);
83 _Check_return_ unsigned char __cdecl _rotl8(_In_ unsigned char _Value, _In_ unsigned char _Shift);
84 _Check_return_ unsigned int __cdecl _rotr(_In_ unsigned int _Value, _In_ int _Shift);
85 _Check_return_ unsigned short __cdecl _rotr16(_In_ unsigned short _Value, _In_ unsigned char _Shift);
86 _Check_return_ unsigned __int64 __cdecl _rotr64(_In_ unsigned __int64 _Value, _In_ int _Shift);
87 _Check_return_ unsigned char __cdecl _rotr8(_In_ unsigned char _Value, _In_ unsigned char _Shift);
88 
89 #if defined(_M_IX86) || defined(_M_X64)
90 
91 void __cpuid(int[4], int);
92 void __cpuidex(int[4], int, int);
93 __int64 __emul(int, int);
94 unsigned __int64 __emulu(unsigned int, unsigned int);
95 unsigned int __getcallerseflags(void);
96 void __halt(void);
97 unsigned char __inbyte(unsigned short);
98 void __inbytestring(unsigned short, unsigned char *, unsigned long);
99 unsigned long __indword(unsigned short);
100 void __indwordstring(unsigned short, unsigned long *, unsigned long);
101 void __int2c(void);
102 void __invlpg(void *);
103 unsigned short __inword(unsigned short);
104 void __inwordstring(unsigned short, unsigned short *, unsigned long);
105 void __lidt(void *);
106 unsigned __int64 __ll_lshift(unsigned __int64, int);
107 __int64 __ll_rshift(__int64, int);
108 unsigned int __lzcnt(unsigned int);
109 unsigned short __lzcnt16(unsigned short);
110 void __movsb(unsigned char *, unsigned char const *, size_t);
111 void __movsd(unsigned long *, unsigned long const *, size_t);
112 void __movsw(unsigned short *, unsigned short const *, size_t);
113 void __nvreg_restore_fence(void);
114 void __nvreg_save_fence(void);
115 void __outbyte(unsigned short, unsigned char);
116 void __outbytestring(unsigned short, unsigned char *, unsigned long);
117 void __outdword(unsigned short, unsigned long);
118 void __outdwordstring(unsigned short, unsigned long *, unsigned long);
119 void __outword(unsigned short, unsigned short);
120 void __outwordstring(unsigned short, unsigned short *, unsigned long);
121 unsigned int __popcnt(unsigned int);
122 unsigned short __popcnt16(unsigned short);
123 unsigned __int64 __rdtsc(void);
124 unsigned __int64 __rdtscp(unsigned int *);
125 unsigned __int64 __readmsr(unsigned long);
126 unsigned __int64 __readpmc(unsigned long);
127 unsigned long __segmentlimit(unsigned long);
128 void __sidt(void *);
129 void _sgdt(void *);
130 void __stosb(unsigned char *, unsigned char, size_t);
131 void __stosd(unsigned long *, unsigned long, size_t);
132 void __stosw(unsigned short *, unsigned short, size_t);
133 void __svm_clgi(void);
134 void __svm_invlpga(void *, int);
135 void __svm_skinit(int);
136 void __svm_stgi(void);
137 void __svm_vmload(size_t);
138 void __svm_vmrun(size_t);
139 void __svm_vmsave(size_t);
140 void __ud2(void);
141 unsigned __int64 __ull_rshift(unsigned __int64, int);
142 void __vmx_off(void);
143 void __vmx_vmptrst(unsigned __int64 *);
144 void __wbinvd(void);
145 void __writemsr(unsigned long, unsigned __int64);
146 int __cdecl _inp(unsigned short);
147 unsigned long __cdecl _inpd(unsigned short);
148 unsigned short __cdecl _inpw(unsigned short);
149 void _m_prefetch(void *);
150 void _m_prefetchw(volatile const void *);
151 
152 void _mm_mfence(void);
153 void _mm_lfence(void);
154 void _mm_sfence(void);
155 void _mm_pause(void);
156 
157 #if 0
158 __m128i _mm_abs_epi16(__m128i);
159 __m128i _mm_abs_epi32(__m128i);
160 __m128i _mm_abs_epi8(__m128i);
161 __m64 _mm_abs_pi16(__m64);
162 __m64 _mm_abs_pi32(__m64);
163 __m64 _mm_abs_pi8(__m64);
164 __m128i _mm_add_epi16(__m128i, __m128i);
165 __m128i _mm_add_epi32(__m128i, __m128i);
166 __m128i _mm_add_epi64(__m128i, __m128i);
167 __m128i _mm_add_epi8(__m128i, __m128i);
168 __m128d _mm_add_pd(__m128d, __m128d);
169 __m128 _mm_add_ps(__m128, __m128);
170 __m128d _mm_add_sd(__m128d, __m128d);
171 __m128 _mm_add_ss(__m128, __m128);
172 __m128i _mm_adds_epi16(__m128i, __m128i);
173 __m128i _mm_adds_epi8(__m128i, __m128i);
174 __m128i _mm_adds_epu16(__m128i, __m128i);
175 __m128i _mm_adds_epu8(__m128i, __m128i);
176 __m128d _mm_addsub_pd(__m128d, __m128d);
177 __m128 _mm_addsub_ps(__m128, __m128);
178 __m128i _mm_alignr_epi8(__m128i, __m128i, int);
179 __m64 _mm_alignr_pi8(__m64, __m64, int);
180 __m128d _mm_and_pd(__m128d, __m128d);
181 __m128 _mm_and_ps(__m128, __m128);
182 __m128i _mm_and_si128(__m128i, __m128i);
183 __m128d _mm_andnot_pd(__m128d, __m128d);
184 __m128 _mm_andnot_ps(__m128, __m128);
185 __m128i _mm_andnot_si128(__m128i, __m128i);
186 __m128i _mm_avg_epu16(__m128i, __m128i);
187 __m128i _mm_avg_epu8(__m128i, __m128i);
188 __m128i _mm_blend_epi16(__m128i, __m128i, int);
189 __m128d _mm_blend_pd(__m128d, __m128d, int);
190 __m128 _mm_blend_ps(__m128, __m128, int);
191 __m128i _mm_blendv_epi8(__m128i, __m128i, __m128i);
192 __m128d _mm_blendv_pd(__m128d, __m128d, __m128d);
193 __m128 _mm_blendv_ps(__m128, __m128, __m128);
194 void _mm_clflush(void const *);
195 __m128i _mm_cmpeq_epi16(__m128i, __m128i);
196 __m128i _mm_cmpeq_epi32(__m128i, __m128i);
197 __m128i _mm_cmpeq_epi64(__m128i, __m128i);
198 __m128i _mm_cmpeq_epi8(__m128i, __m128i);
199 __m128d _mm_cmpeq_pd(__m128d, __m128d);
200 __m128 _mm_cmpeq_ps(__m128, __m128);
201 __m128d _mm_cmpeq_sd(__m128d, __m128d);
202 __m128 _mm_cmpeq_ss(__m128, __m128);
203 int _mm_cmpestra(__m128i, int, __m128i, int, int);
204 int _mm_cmpestrc(__m128i, int, __m128i, int, int);
205 int _mm_cmpestri(__m128i, int, __m128i, int, int);
206 __m128i _mm_cmpestrm(__m128i, int, __m128i, int, int);
207 int _mm_cmpestro(__m128i, int, __m128i, int, int);
208 int _mm_cmpestrs(__m128i, int, __m128i, int, int);
209 int _mm_cmpestrz(__m128i, int, __m128i, int, int);
210 __m128d _mm_cmpge_pd(__m128d, __m128d);
211 __m128 _mm_cmpge_ps(__m128, __m128);
212 __m128d _mm_cmpge_sd(__m128d, __m128d);
213 __m128 _mm_cmpge_ss(__m128, __m128);
214 __m128i _mm_cmpgt_epi16(__m128i, __m128i);
215 __m128i _mm_cmpgt_epi32(__m128i, __m128i);
216 __m128i _mm_cmpgt_epi64(__m128i, __m128i);
217 __m128i _mm_cmpgt_epi8(__m128i, __m128i);
218 __m128d _mm_cmpgt_pd(__m128d, __m128d);
219 __m128 _mm_cmpgt_ps(__m128, __m128);
220 __m128d _mm_cmpgt_sd(__m128d, __m128d);
221 __m128 _mm_cmpgt_ss(__m128, __m128);
222 int _mm_cmpistra(__m128i, __m128i, int);
223 int _mm_cmpistrc(__m128i, __m128i, int);
224 int _mm_cmpistri(__m128i, __m128i, int);
225 __m128i _mm_cmpistrm(__m128i, __m128i, int);
226 int _mm_cmpistro(__m128i, __m128i, int);
227 int _mm_cmpistrs(__m128i, __m128i, int);
228 int _mm_cmpistrz(__m128i, __m128i, int);
229 __m128d _mm_cmple_pd(__m128d, __m128d);
230 __m128 _mm_cmple_ps(__m128, __m128);
231 __m128d _mm_cmple_sd(__m128d, __m128d);
232 __m128 _mm_cmple_ss(__m128, __m128);
233 __m128i _mm_cmplt_epi16(__m128i, __m128i);
234 __m128i _mm_cmplt_epi32(__m128i, __m128i);
235 __m128i _mm_cmplt_epi8(__m128i, __m128i);
236 __m128d _mm_cmplt_pd(__m128d, __m128d);
237 __m128 _mm_cmplt_ps(__m128, __m128);
238 __m128d _mm_cmplt_sd(__m128d, __m128d);
239 __m128 _mm_cmplt_ss(__m128, __m128);
240 __m128d _mm_cmpneq_pd(__m128d, __m128d);
241 __m128 _mm_cmpneq_ps(__m128, __m128);
242 __m128d _mm_cmpneq_sd(__m128d, __m128d);
243 __m128 _mm_cmpneq_ss(__m128, __m128);
244 __m128d _mm_cmpnge_pd(__m128d, __m128d);
245 __m128 _mm_cmpnge_ps(__m128, __m128);
246 __m128d _mm_cmpnge_sd(__m128d, __m128d);
247 __m128 _mm_cmpnge_ss(__m128, __m128);
248 __m128d _mm_cmpngt_pd(__m128d, __m128d);
249 __m128 _mm_cmpngt_ps(__m128, __m128);
250 __m128d _mm_cmpngt_sd(__m128d, __m128d);
251 __m128 _mm_cmpngt_ss(__m128, __m128);
252 __m128d _mm_cmpnle_pd(__m128d, __m128d);
253 __m128 _mm_cmpnle_ps(__m128, __m128);
254 __m128d _mm_cmpnle_sd(__m128d, __m128d);
255 __m128 _mm_cmpnle_ss(__m128, __m128);
256 __m128d _mm_cmpnlt_pd(__m128d, __m128d);
257 __m128 _mm_cmpnlt_ps(__m128, __m128);
258 __m128d _mm_cmpnlt_sd(__m128d, __m128d);
259 __m128 _mm_cmpnlt_ss(__m128, __m128);
260 __m128d _mm_cmpord_pd(__m128d, __m128d);
261 __m128 _mm_cmpord_ps(__m128, __m128);
262 __m128d _mm_cmpord_sd(__m128d, __m128d);
263 __m128 _mm_cmpord_ss(__m128, __m128);
264 __m128d _mm_cmpunord_pd(__m128d, __m128d);
265 __m128 _mm_cmpunord_ps(__m128, __m128);
266 __m128d _mm_cmpunord_sd(__m128d, __m128d);
267 __m128 _mm_cmpunord_ss(__m128, __m128);
268 int _mm_comieq_sd(__m128d, __m128d);
269 int _mm_comieq_ss(__m128, __m128);
270 int _mm_comige_sd(__m128d, __m128d);
271 int _mm_comige_ss(__m128, __m128);
272 int _mm_comigt_sd(__m128d, __m128d);
273 int _mm_comigt_ss(__m128, __m128);
274 int _mm_comile_sd(__m128d, __m128d);
275 int _mm_comile_ss(__m128, __m128);
276 int _mm_comilt_sd(__m128d, __m128d);
277 int _mm_comilt_ss(__m128, __m128);
278 int _mm_comineq_sd(__m128d, __m128d);
279 int _mm_comineq_ss(__m128, __m128);
280 unsigned int _mm_crc32_u16(unsigned int, unsigned short);
281 unsigned int _mm_crc32_u32(unsigned int, unsigned int);
282 unsigned int _mm_crc32_u8(unsigned int, unsigned char);
283 __m128 _mm_cvt_si2ss(__m128, int);
284 int _mm_cvt_ss2si(__m128);
285 __m128i _mm_cvtepi16_epi32(__m128i);
286 __m128i _mm_cvtepi16_epi64(__m128i);
287 __m128i _mm_cvtepi32_epi64(__m128i);
288 __m128d _mm_cvtepi32_pd(__m128i);
289 __m128 _mm_cvtepi32_ps(__m128i);
290 __m128i _mm_cvtepi8_epi16(__m128i);
291 __m128i _mm_cvtepi8_epi32(__m128i);
292 __m128i _mm_cvtepi8_epi64(__m128i);
293 __m128i _mm_cvtepu16_epi32(__m128i);
294 __m128i _mm_cvtepu16_epi64(__m128i);
295 __m128i _mm_cvtepu32_epi64(__m128i);
296 __m128i _mm_cvtepu8_epi16(__m128i);
297 __m128i _mm_cvtepu8_epi32(__m128i);
298 __m128i _mm_cvtepu8_epi64(__m128i);
299 __m128i _mm_cvtpd_epi32(__m128d);
300 __m128 _mm_cvtpd_ps(__m128d);
301 __m128i _mm_cvtps_epi32(__m128);
302 __m128d _mm_cvtps_pd(__m128);
303 int _mm_cvtsd_si32(__m128d);
304 __m128 _mm_cvtsd_ss(__m128, __m128d);
305 int _mm_cvtsi128_si32(__m128i);
306 __m128d _mm_cvtsi32_sd(__m128d, int);
307 __m128i _mm_cvtsi32_si128(int);
308 __m128d _mm_cvtss_sd(__m128d, __m128);
309 int _mm_cvtt_ss2si(__m128);
310 __m128i _mm_cvttpd_epi32(__m128d);
311 __m128i _mm_cvttps_epi32(__m128);
312 int _mm_cvttsd_si32(__m128d);
313 __m128d _mm_div_pd(__m128d, __m128d);
314 __m128 _mm_div_ps(__m128, __m128);
315 __m128d _mm_div_sd(__m128d, __m128d);
316 __m128 _mm_div_ss(__m128, __m128);
317 __m128d _mm_dp_pd(__m128d, __m128d, int);
318 __m128 _mm_dp_ps(__m128, __m128, int);
319 int _mm_extract_epi16(__m128i, int);
320 int _mm_extract_epi32(__m128i, int);
321 int _mm_extract_epi8(__m128i, int);
322 int _mm_extract_ps(__m128, int);
323 __m128i _mm_extract_si64(__m128i, __m128i);
324 __m128i _mm_extracti_si64(__m128i, int, int);
325 unsigned int _mm_getcsr(void);
326 __m128i _mm_hadd_epi16(__m128i, __m128i);
327 __m128i _mm_hadd_epi32(__m128i, __m128i);
328 __m128d _mm_hadd_pd(__m128d, __m128d);
329 __m64 _mm_hadd_pi16(__m64, __m64);
330 __m64 _mm_hadd_pi32(__m64, __m64);
331 __m128 _mm_hadd_ps(__m128, __m128);
332 __m128i _mm_hadds_epi16(__m128i, __m128i);
333 __m64 _mm_hadds_pi16(__m64, __m64);
334 __m128i _mm_hsub_epi16(__m128i, __m128i);
335 __m128i _mm_hsub_epi32(__m128i, __m128i);
336 __m128d _mm_hsub_pd(__m128d, __m128d);
337 __m64 _mm_hsub_pi16(__m64, __m64);
338 __m64 _mm_hsub_pi32(__m64, __m64);
339 __m128 _mm_hsub_ps(__m128, __m128);
340 __m128i _mm_hsubs_epi16(__m128i, __m128i);
341 __m64 _mm_hsubs_pi16(__m64, __m64);
342 __m128i _mm_insert_epi16(__m128i, int, int);
343 __m128i _mm_insert_epi32(__m128i, int, int);
344 __m128i _mm_insert_epi8(__m128i, int, int);
345 __m128 _mm_insert_ps(__m128, __m128, int);
346 __m128i _mm_insert_si64(__m128i, __m128i);
347 __m128i _mm_inserti_si64(__m128i, __m128i, int, int);
348 __m128i _mm_lddqu_si128(__m128i const *);
349 __m128d _mm_load1_pd(double const *);
350 __m128d _mm_load_pd(double const *);
351 __m128 _mm_load_ps(float const *);
352 __m128 _mm_load_ps1(float const *);
353 __m128d _mm_load_sd(double const *);
354 __m128i _mm_load_si128(__m128i const *);
355 __m128 _mm_load_ss(float const *);
356 __m128d _mm_loaddup_pd(double const *);
357 __m128d _mm_loadh_pd(__m128d, double const *);
358 __m128 _mm_loadh_pi(__m128, __m64 const *);
359 __m128i _mm_loadl_epi64(__m128i const *);
360 __m128d _mm_loadl_pd(__m128d, double const *);
361 __m128 _mm_loadl_pi(__m128, __m64 const *);
362 __m128d _mm_loadr_pd(double const *);
363 __m128 _mm_loadr_ps(float const *);
364 __m128d _mm_loadu_pd(double const *);
365 __m128 _mm_loadu_ps(float const *);
366 __m128i _mm_loadu_si128(__m128i const *);
367 __m128i _mm_madd_epi16(__m128i, __m128i);
368 __m128i _mm_maddubs_epi16(__m128i, __m128i);
369 __m64 _mm_maddubs_pi16(__m64, __m64);
370 void _mm_maskmoveu_si128(__m128i, __m128i, _Out_writes_bytes_(16) char *);
371 __m128i _mm_max_epi16(__m128i, __m128i);
372 __m128i _mm_max_epi32(__m128i, __m128i);
373 __m128i _mm_max_epi8(__m128i, __m128i);
374 __m128i _mm_max_epu16(__m128i, __m128i);
375 __m128i _mm_max_epu32(__m128i, __m128i);
376 __m128i _mm_max_epu8(__m128i, __m128i);
377 __m128d _mm_max_pd(__m128d, __m128d);
378 __m128 _mm_max_ps(__m128, __m128);
379 __m128d _mm_max_sd(__m128d, __m128d);
380 __m128 _mm_max_ss(__m128, __m128);
381 __m128i _mm_min_epi16(__m128i, __m128i);
382 __m128i _mm_min_epi32(__m128i, __m128i);
383 __m128i _mm_min_epi8(__m128i, __m128i);
384 __m128i _mm_min_epu16(__m128i, __m128i);
385 __m128i _mm_min_epu32(__m128i, __m128i);
386 __m128i _mm_min_epu8(__m128i, __m128i);
387 __m128d _mm_min_pd(__m128d, __m128d);
388 __m128 _mm_min_ps(__m128, __m128);
389 __m128d _mm_min_sd(__m128d, __m128d);
390 __m128 _mm_min_ss(__m128, __m128);
391 __m128i _mm_minpos_epu16(__m128i);
392 void _mm_monitor(void const *, unsigned int, unsigned int);
393 __m128i _mm_move_epi64(__m128i);
394 __m128d _mm_move_sd(__m128d, __m128d);
395 __m128 _mm_move_ss(__m128, __m128);
396 __m128d _mm_movedup_pd(__m128d);
397 __m128 _mm_movehdup_ps(__m128);
398 __m128 _mm_movehl_ps(__m128, __m128);
399 __m128 _mm_moveldup_ps(__m128);
400 __m128 _mm_movelh_ps(__m128, __m128);
401 int _mm_movemask_epi8(__m128i);
402 int _mm_movemask_pd(__m128d);
403 int _mm_movemask_ps(__m128);
404 __m64 _mm_movepi64_pi64(__m128i);
405 __m128i _mm_movpi64_epi64(__m64);
406 __m128i _mm_mpsadbw_epu8(__m128i, __m128i, int);
407 __m128i _mm_mul_epi32(__m128i, __m128i);
408 __m128i _mm_mul_epu32(__m128i, __m128i);
409 __m128d _mm_mul_pd(__m128d, __m128d);
410 __m128 _mm_mul_ps(__m128, __m128);
411 __m128d _mm_mul_sd(__m128d, __m128d);
412 __m128 _mm_mul_ss(__m128, __m128);
413 __m128i _mm_mulhi_epi16(__m128i, __m128i);
414 __m128i _mm_mulhi_epu16(__m128i, __m128i);
415 __m128i _mm_mulhrs_epi16(__m128i, __m128i);
416 __m64 _mm_mulhrs_pi16(__m64, __m64);
417 __m128i _mm_mullo_epi16(__m128i, __m128i);
418 __m128i _mm_mullo_epi32(__m128i, __m128i);
419 void _mm_mwait(unsigned int, unsigned int);
420 __m128d _mm_or_pd(__m128d, __m128d);
421 __m128 _mm_or_ps(__m128, __m128);
422 __m128i _mm_or_si128(__m128i, __m128i);
423 __m128i _mm_packs_epi16(__m128i, __m128i);
424 __m128i _mm_packs_epi32(__m128i, __m128i);
425 __m128i _mm_packus_epi16(__m128i, __m128i);
426 __m128i _mm_packus_epi32(__m128i, __m128i);
427 int _mm_popcnt_u32(unsigned int);
428 void _mm_prefetch(_In_ char const *, _In_ int);
429 __m128 _mm_rcp_ps(__m128);
430 __m128 _mm_rcp_ss(__m128);
431 __m128d _mm_round_pd(__m128d, int);
432 __m128 _mm_round_ps(__m128, int);
433 __m128d _mm_round_sd(__m128d, __m128d, int);
434 __m128 _mm_round_ss(__m128, __m128, int);
435 __m128 _mm_rsqrt_ps(__m128);
436 __m128 _mm_rsqrt_ss(__m128);
437 __m128i _mm_sad_epu8(__m128i, __m128i);
438 __m128i _mm_set1_epi16(short);
439 __m128i _mm_set1_epi32(int);
440 __m128i _mm_set1_epi8(char);
441 __m128d _mm_set1_pd(double);
442 __m128i _mm_set_epi16(short, short, short, short, short, short, short, short);
443 __m128i _mm_set_epi32(int, int, int, int);
444 __m128i _mm_set_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char);
445 __m128d _mm_set_pd(double, double);
446 __m128 _mm_set_ps(float, float, float, float);
447 __m128 _mm_set_ps1(float);
448 __m128d _mm_set_sd(double);
449 __m128 _mm_set_ss(float);
450 void _mm_setcsr(unsigned int);
451 __m128i _mm_setl_epi64(__m128i);
452 __m128i _mm_setr_epi16(short, short, short, short, short, short, short, short);
453 __m128i _mm_setr_epi32(int, int, int, int);
454 __m128i _mm_setr_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char);
455 __m128d _mm_setr_pd(double, double);
456 __m128 _mm_setr_ps(float, float, float, float);
457 __m128d _mm_setzero_pd(void);
458 __m128 _mm_setzero_ps(void);
459 __m128i _mm_setzero_si128(void);
460 __m128i _mm_shuffle_epi32(__m128i, int);
461 __m128i _mm_shuffle_epi8(__m128i, __m128i);
462 __m128d _mm_shuffle_pd(__m128d, __m128d, int);
463 __m64 _mm_shuffle_pi8(__m64, __m64);
464 __m128 _mm_shuffle_ps(__m128, __m128, unsigned int);
465 __m128i _mm_shufflehi_epi16(__m128i, int);
466 __m128i _mm_shufflelo_epi16(__m128i, int);
467 __m128i _mm_sign_epi16(__m128i, __m128i);
468 __m128i _mm_sign_epi32(__m128i, __m128i);
469 __m128i _mm_sign_epi8(__m128i, __m128i);
470 __m64 _mm_sign_pi16(__m64, __m64);
471 __m64 _mm_sign_pi32(__m64, __m64);
472 __m64 _mm_sign_pi8(__m64, __m64);
473 __m128i _mm_sll_epi16(__m128i, __m128i);
474 __m128i _mm_sll_epi32(__m128i, __m128i);
475 __m128i _mm_sll_epi64(__m128i, __m128i);
476 __m128i _mm_slli_epi16(__m128i, int);
477 __m128i _mm_slli_epi32(__m128i, int);
478 __m128i _mm_slli_epi64(__m128i, int);
479 __m128i _mm_slli_si128(__m128i, int);
480 __m128d _mm_sqrt_pd(__m128d);
481 __m128 _mm_sqrt_ps(__m128);
482 __m128d _mm_sqrt_sd(__m128d, __m128d);
483 __m128 _mm_sqrt_ss(__m128);
484 __m128i _mm_sra_epi16(__m128i, __m128i);
485 __m128i _mm_sra_epi32(__m128i, __m128i);
486 __m128i _mm_srai_epi16(__m128i, int);
487 __m128i _mm_srai_epi32(__m128i, int);
488 __m128i _mm_srl_epi16(__m128i, __m128i);
489 __m128i _mm_srl_epi32(__m128i, __m128i);
490 __m128i _mm_srl_epi64(__m128i, __m128i);
491 __m128i _mm_srli_epi16(__m128i, int);
492 __m128i _mm_srli_epi32(__m128i, int);
493 __m128i _mm_srli_epi64(__m128i, int);
494 __m128i _mm_srli_si128(__m128i, int);
495 void _mm_store1_pd(double *, __m128d);
496 void _mm_store_pd(double *, __m128d);
497 void _mm_store_ps(float *, __m128);
498 void _mm_store_ps1(float *, __m128);
499 void _mm_store_sd(double *, __m128d);
500 void _mm_store_si128(__m128i *, __m128i);
501 void _mm_store_ss(float *, __m128);
502 void _mm_storeh_pd(double *, __m128d);
503 void _mm_storeh_pi(__m64 *, __m128);
504 void _mm_storel_epi64(__m128i *, __m128i);
505 void _mm_storel_pd(double *, __m128d);
506 void _mm_storel_pi(__m64 *, __m128);
507 void _mm_storer_pd(double *, __m128d);
508 void _mm_storer_ps(float *, __m128);
509 void _mm_storeu_pd(double *, __m128d);
510 void _mm_storeu_ps(float *, __m128);
511 void _mm_storeu_si128(__m128i *, __m128i);
512 __m128i _mm_stream_load_si128(__m128i *);
513 void _mm_stream_pd(double *, __m128d);
514 void _mm_stream_ps(float *, __m128);
515 void _mm_stream_sd(double *, __m128d);
516 void _mm_stream_si128(__m128i *, __m128i);
517 void _mm_stream_si32(int *, int);
518 void _mm_stream_ss(float *, __m128);
519 __m128i _mm_sub_epi16(__m128i, __m128i);
520 __m128i _mm_sub_epi32(__m128i, __m128i);
521 __m128i _mm_sub_epi64(__m128i, __m128i);
522 __m128i _mm_sub_epi8(__m128i, __m128i);
523 __m128d _mm_sub_pd(__m128d, __m128d);
524 __m128 _mm_sub_ps(__m128, __m128);
525 __m128d _mm_sub_sd(__m128d, __m128d);
526 __m128 _mm_sub_ss(__m128, __m128);
527 __m128i _mm_subs_epi16(__m128i, __m128i);
528 __m128i _mm_subs_epi8(__m128i, __m128i);
529 __m128i _mm_subs_epu16(__m128i, __m128i);
530 __m128i _mm_subs_epu8(__m128i, __m128i);
531 int _mm_testc_si128(__m128i, __m128i);
532 int _mm_testnzc_si128(__m128i, __m128i);
533 int _mm_testz_si128(__m128i, __m128i);
534 int _mm_ucomieq_sd(__m128d, __m128d);
535 int _mm_ucomieq_ss(__m128, __m128);
536 int _mm_ucomige_sd(__m128d, __m128d);
537 int _mm_ucomige_ss(__m128, __m128);
538 int _mm_ucomigt_sd(__m128d, __m128d);
539 int _mm_ucomigt_ss(__m128, __m128);
540 int _mm_ucomile_sd(__m128d, __m128d);
541 int _mm_ucomile_ss(__m128, __m128);
542 int _mm_ucomilt_sd(__m128d, __m128d);
543 int _mm_ucomilt_ss(__m128, __m128);
544 int _mm_ucomineq_sd(__m128d, __m128d);
545 int _mm_ucomineq_ss(__m128, __m128);
546 __m128i _mm_unpackhi_epi16(__m128i, __m128i);
547 __m128i _mm_unpackhi_epi32(__m128i, __m128i);
548 __m128i _mm_unpackhi_epi64(__m128i, __m128i);
549 __m128i _mm_unpackhi_epi8(__m128i, __m128i);
550 __m128d _mm_unpackhi_pd(__m128d, __m128d);
551 __m128 _mm_unpackhi_ps(__m128, __m128);
552 __m128i _mm_unpacklo_epi16(__m128i, __m128i);
553 __m128i _mm_unpacklo_epi32(__m128i, __m128i);
554 __m128i _mm_unpacklo_epi64(__m128i, __m128i);
555 __m128i _mm_unpacklo_epi8(__m128i, __m128i);
556 __m128d _mm_unpacklo_pd(__m128d, __m128d);
557 __m128 _mm_unpacklo_ps(__m128, __m128);
558 __m128d _mm_xor_pd(__m128d, __m128d);
559 __m128 _mm_xor_ps(__m128, __m128);
560 __m128i _mm_xor_si128(__m128i, __m128i);
561 #endif // 0
562 
563 int __cdecl _outp(unsigned short, int);
564 unsigned long __cdecl _outpd(unsigned short, unsigned long);
565 unsigned short __cdecl _outpw(unsigned short, unsigned short);
566 int __cdecl inp(unsigned short);
567 unsigned long __cdecl inpd(unsigned short);
568 unsigned short __cdecl inpw(unsigned short);
569 int __cdecl outp(unsigned short, int);
570 unsigned long __cdecl outpd(unsigned short, unsigned long);
571 unsigned short __cdecl outpw(unsigned short, unsigned short);
572 
573 #endif /* _M_IX86 || _M_X64 */
574 
575 #if defined(_M_IX86)
576 
577 long _InterlockedAddLargeStatistic(_Interlocked_operand_ __int64 volatile * _Addend, long _Value);
578 void __addfsbyte(unsigned long, unsigned char);
579 void __addfsdword(unsigned long, unsigned long);
580 void __addfsword(unsigned long, unsigned short);
581 void __incfsbyte(unsigned long);
582 void __incfsdword(unsigned long);
583 void __incfsword(unsigned long);
584 unsigned long __readcr0(void);
585 unsigned long __readcr2(void);
586 unsigned long __readcr3(void);
587 unsigned long __readcr4(void);
588 unsigned long __readcr8(void);
589 unsigned int __readdr(unsigned int);
590 unsigned int __readeflags(void);
591 unsigned char __readfsbyte(unsigned long);
592 unsigned long __readfsdword(unsigned long);
593 unsigned short __readfsword(unsigned long);
594 void __writecr0(unsigned int);
595 void __writecr3(unsigned int);
596 void __writecr4(unsigned int);
597 void __writecr8(unsigned int);
598 void __writedr(unsigned int, unsigned int);
599 void __writeeflags(unsigned int);
600 void __writefsbyte(unsigned long, unsigned char);
601 void __writefsdword(unsigned long, unsigned long);
602 void __writefsword(unsigned long, unsigned short);
603 void _m_empty(void);
604 void _m_femms(void);
605 
606 #if 0
607 __m64 _m_from_float(float);
608 __m64 _m_from_int(int);
609 void _m_maskmovq(__m64, __m64, _Out_writes_bytes_(8) char *);
610 __m64 _m_packssdw(__m64, __m64);
611 __m64 _m_packsswb(__m64, __m64);
612 __m64 _m_packuswb(__m64, __m64);
613 __m64 _m_paddb(__m64, __m64);
614 __m64 _m_paddd(__m64, __m64);
615 __m64 _m_paddsb(__m64, __m64);
616 __m64 _m_paddsw(__m64, __m64);
617 __m64 _m_paddusb(__m64, __m64);
618 __m64 _m_paddusw(__m64, __m64);
619 __m64 _m_paddw(__m64, __m64);
620 __m64 _m_pand(__m64, __m64);
621 __m64 _m_pandn(__m64, __m64);
622 __m64 _m_pavgb(__m64, __m64);
623 __m64 _m_pavgusb(__m64, __m64);
624 __m64 _m_pavgw(__m64, __m64);
625 __m64 _m_pcmpeqb(__m64, __m64);
626 __m64 _m_pcmpeqd(__m64, __m64);
627 __m64 _m_pcmpeqw(__m64, __m64);
628 __m64 _m_pcmpgtb(__m64, __m64);
629 __m64 _m_pcmpgtd(__m64, __m64);
630 __m64 _m_pcmpgtw(__m64, __m64);
631 int _m_pextrw(__m64, int);
632 __m64 _m_pf2id(__m64);
633 __m64 _m_pf2iw(__m64);
634 __m64 _m_pfacc(__m64, __m64);
635 __m64 _m_pfadd(__m64, __m64);
636 __m64 _m_pfcmpeq(__m64, __m64);
637 __m64 _m_pfcmpge(__m64, __m64);
638 __m64 _m_pfcmpgt(__m64, __m64);
639 __m64 _m_pfmax(__m64, __m64);
640 __m64 _m_pfmin(__m64, __m64);
641 __m64 _m_pfmul(__m64, __m64);
642 __m64 _m_pfnacc(__m64, __m64);
643 __m64 _m_pfpnacc(__m64, __m64);
644 __m64 _m_pfrcp(__m64);
645 __m64 _m_pfrcpit1(__m64, __m64);
646 __m64 _m_pfrcpit2(__m64, __m64);
647 __m64 _m_pfrsqit1(__m64, __m64);
648 __m64 _m_pfrsqrt(__m64);
649 __m64 _m_pfsub(__m64, __m64);
650 __m64 _m_pfsubr(__m64, __m64);
651 __m64 _m_pi2fd(__m64);
652 __m64 _m_pi2fw(__m64);
653 __m64 _m_pinsrw(__m64, int, int);
654 __m64 _m_pmaddwd(__m64, __m64);
655 __m64 _m_pmaxsw(__m64, __m64);
656 __m64 _m_pmaxub(__m64, __m64);
657 __m64 _m_pminsw(__m64, __m64);
658 __m64 _m_pminub(__m64, __m64);
659 int _m_pmovmskb(__m64);
660 __m64 _m_pmulhrw(__m64, __m64);
661 __m64 _m_pmulhuw(__m64, __m64);
662 __m64 _m_pmulhw(__m64, __m64);
663 __m64 _m_pmullw(__m64, __m64);
664 __m64 _m_por(__m64, __m64);
665 __m64 _m_psadbw(__m64, __m64);
666 __m64 _m_pshufw(__m64, int);
667 __m64 _m_pslld(__m64, __m64);
668 __m64 _m_pslldi(__m64, int);
669 __m64 _m_psllq(__m64, __m64);
670 __m64 _m_psllqi(__m64, int);
671 __m64 _m_psllw(__m64, __m64);
672 __m64 _m_psllwi(__m64, int);
673 __m64 _m_psrad(__m64, __m64);
674 __m64 _m_psradi(__m64, int);
675 __m64 _m_psraw(__m64, __m64);
676 __m64 _m_psrawi(__m64, int);
677 __m64 _m_psrld(__m64, __m64);
678 __m64 _m_psrldi(__m64, int);
679 __m64 _m_psrlq(__m64, __m64);
680 __m64 _m_psrlqi(__m64, int);
681 __m64 _m_psrlw(__m64, __m64);
682 __m64 _m_psrlwi(__m64, int);
683 __m64 _m_psubb(__m64, __m64);
684 __m64 _m_psubd(__m64, __m64);
685 __m64 _m_psubsb(__m64, __m64);
686 __m64 _m_psubsw(__m64, __m64);
687 __m64 _m_psubusb(__m64, __m64);
688 __m64 _m_psubusw(__m64, __m64);
689 __m64 _m_psubw(__m64, __m64);
690 __m64 _m_pswapd(__m64);
691 __m64 _m_punpckhbw(__m64, __m64);
692 __m64 _m_punpckhdq(__m64, __m64);
693 __m64 _m_punpckhwd(__m64, __m64);
694 __m64 _m_punpcklbw(__m64, __m64);
695 __m64 _m_punpckldq(__m64, __m64);
696 __m64 _m_punpcklwd(__m64, __m64);
697 __m64 _m_pxor(__m64, __m64);
698 float _m_to_float(__m64);
699 int _m_to_int(__m64);
700 __m64 _mm_add_si64(__m64, __m64);
701 __m128 _mm_cvt_pi2ps(__m128, __m64);
702 __m64 _mm_cvt_ps2pi(__m128);
703 __m64 _mm_cvtpd_pi32(__m128d);
704 __m128d _mm_cvtpi32_pd(__m64);
705 __m64 _mm_cvtt_ps2pi(__m128);
706 __m64 _mm_cvttpd_pi32(__m128d);
707 __m64 _mm_mul_su32(__m64, __m64);
708 __m128i _mm_set1_epi64(__m64);
709 __m64 _mm_set1_pi16(short);
710 __m64 _mm_set1_pi32(int);
711 __m64 _mm_set1_pi8(char);
712 __m128i _mm_set_epi64(__m64, __m64);
713 __m64 _mm_set_pi16(short, short, short, short);
714 __m64 _mm_set_pi32(int, int);
715 __m64 _mm_set_pi8(char, char, char, char, char, char, char, char);
716 __m128i _mm_setr_epi64(__m64, __m64);
717 __m64 _mm_setr_pi16(short, short, short, short);
718 __m64 _mm_setr_pi32(int, int);
719 __m64 _mm_setr_pi8(char, char, char, char, char, char, char, char);
720 __m64 _mm_setzero_si64(void);
721 void _mm_stream_pi(__m64 *, __m64);
722 __m64 _mm_sub_si64(__m64, __m64);
723 #endif // 0
724 
725 #endif /* _M_IX86 */
726 
727 #if defined(_M_X64)
728 
729 unsigned char _BitScanForward64(unsigned long * _Index, unsigned __int64 _Mask);
730 unsigned char _BitScanReverse64(unsigned long * _Index, unsigned __int64 _Mask);
731 short _InterlockedAnd16_np(short volatile * _Value, short _Mask);
732 __int64 _InterlockedAnd64_np(__int64 volatile * _Value, __int64 _Mask);
733 char _InterlockedAnd8_np(char volatile * _Value, char _Mask);
734 long _InterlockedAnd_np(long volatile * _Value, long _Mask);
735 unsigned char _InterlockedCompareExchange128(_Interlocked_operand_ __int64 volatile * _Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 * _ComparandResult);
736 unsigned char _InterlockedCompareExchange128_np(__int64 volatile * _Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, __int64 * _ComparandResult);
737 short _InterlockedCompareExchange16_np(short volatile * _Destination, short _Exchange, short _Comparand);
738 __int64 _InterlockedCompareExchange64_np(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand);
739 void * _InterlockedCompareExchangePointer_np(void * volatile * _Destination, void * _Exchange, void * _Comparand);
740 long _InterlockedCompareExchange_np(long volatile * _Destination, long _Exchange, long _Comparand);
741 short _InterlockedOr16_np(short volatile * _Value, short _Mask);
742 __int64 _InterlockedOr64_np(__int64 volatile * _Value, __int64 _Mask);
743 char _InterlockedOr8_np(char volatile * _Value, char _Mask);
744 long _InterlockedOr_np(long volatile * _Value, long _Mask);
745 short _InterlockedXor16_np(short volatile * _Value, short _Mask);
746 __int64 _InterlockedXor64_np(__int64 volatile * _Value, __int64 _Mask);
747 char _InterlockedXor8_np(char volatile * _Value, char _Mask);
748 long _InterlockedXor_np(long volatile * _Value, long _Mask);
749 void __addgsbyte(unsigned long, unsigned char);
750 void __addgsdword(unsigned long, unsigned long);
751 void __addgsqword(unsigned long, unsigned __int64);
752 void __addgsword(unsigned long, unsigned short);
753 void __faststorefence(void);
754 void __incgsbyte(unsigned long);
755 void __incgsdword(unsigned long);
756 void __incgsqword(unsigned long);
757 void __incgsword(unsigned long);
758 unsigned __int64 __lzcnt64(unsigned __int64);
759 void __movsq(unsigned long long *, unsigned long long const *, size_t);
760 __int64 __mulh(__int64, __int64);
761 __int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, __int64 * _HighProduct);
762 unsigned __int64 _umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand, unsigned __int64 * _HighProduct);
763 unsigned __int64 __popcnt64(unsigned __int64);
764 unsigned __int64 __readcr0(void);
765 unsigned __int64 __readcr2(void);
766 unsigned __int64 __readcr3(void);
767 unsigned __int64 __readcr4(void);
768 unsigned __int64 __readcr8(void);
769 unsigned __int64 __readdr(unsigned int);
770 unsigned __int64 __readeflags(void);
771 unsigned char __readgsbyte(unsigned long);
772 unsigned long __readgsdword(unsigned long);
773 unsigned __int64 __readgsqword(unsigned long);
774 unsigned short __readgsword(unsigned long);
775 unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift);
776 unsigned __int64 __shiftright128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift);
777 void __stosq(unsigned __int64 *, unsigned __int64, size_t);
778 unsigned __int64 __umulh(unsigned __int64, unsigned __int64);
779 unsigned char __vmx_on(unsigned __int64 *);
780 unsigned char __vmx_vmclear(unsigned __int64 *);
781 unsigned char __vmx_vmlaunch(void);
782 unsigned char __vmx_vmptrld(unsigned __int64 *);
783 unsigned char __vmx_vmread(size_t, size_t *);
784 unsigned char __vmx_vmresume(void);
785 unsigned char __vmx_vmwrite(size_t, size_t);
786 void __writecr0(unsigned __int64);
787 void __writecr3(unsigned __int64);
788 void __writecr4(unsigned __int64);
789 void __writecr8(unsigned __int64);
790 void __writedr(unsigned int, unsigned __int64);
791 void __writeeflags(unsigned __int64);
792 void __writegsbyte(unsigned long, unsigned char);
793 void __writegsdword(unsigned long, unsigned long);
794 void __writegsqword(unsigned long, unsigned __int64);
795 void __writegsword(unsigned long, unsigned short);
796 unsigned char _bittest64(__int64 const *, __int64);
797 unsigned char _bittestandcomplement64(__int64 *, __int64);
798 unsigned char _bittestandreset64(__int64 *, __int64);
799 unsigned char _bittestandset64(__int64 *, __int64);
800 unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
801 unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
802 unsigned __int64 _mm_crc32_u64(unsigned __int64, unsigned __int64);
803 #if 0
804 __int64 _mm_cvtsd_si64(__m128d);
805 __int64 _mm_cvtsd_si64x(__m128d);
806 __int64 _mm_cvtsi128_si64(__m128i);
807 __int64 _mm_cvtsi128_si64x(__m128i);
808 __m128d _mm_cvtsi64_sd(__m128d, __int64);
809 __m128i _mm_cvtsi64_si128(__int64);
810 __m128 _mm_cvtsi64_ss(__m128, __int64);
811 __m128d _mm_cvtsi64x_sd(__m128d, __int64);
812 __m128i _mm_cvtsi64x_si128(__int64);
813 __m128 _mm_cvtsi64x_ss(__m128, __int64);
814 __int64 _mm_cvtss_si64(__m128);
815 __int64 _mm_cvtss_si64x(__m128);
816 __int64 _mm_cvttsd_si64(__m128d);
817 __int64 _mm_cvttsd_si64x(__m128d);
818 __int64 _mm_cvttss_si64(__m128);
819 __int64 _mm_cvttss_si64x(__m128);
820 __int64 _mm_extract_epi64(__m128i, int);
821 __m128i _mm_insert_epi64(__m128i, __int64, int);
822 __int64 _mm_popcnt_u64(unsigned __int64);
823 __m128i _mm_set1_epi64x(__int64);
824 __m128i _mm_set_epi64x(__int64, __int64);
825 void _mm_stream_si64x(__int64 *, __int64);
826 #endif
827 #endif /* _M_X64 */
828 
829 #if defined(_M_ARM) || defined(_M_X64)
830 
831 __int64 _InterlockedAnd64(_Interlocked_operand_ __int64 volatile * _Value, __int64 _Mask);
832 __int64 _InterlockedDecrement64(_Interlocked_operand_ __int64 volatile * _Addend);
833 __int64 _InterlockedExchange64(_Interlocked_operand_ __int64 volatile * _Target, __int64 _Value);
834 __int64 _InterlockedExchangeAdd64(_Interlocked_operand_ __int64 volatile * _Addend, __int64 _Value);
835 __int64 _InterlockedIncrement64(_Interlocked_operand_ __int64 volatile * _Addend);
836 __int64 _InterlockedOr64(_Interlocked_operand_ __int64 volatile * _Value, __int64 _Mask);
837 __int64 _InterlockedXor64(_Interlocked_operand_ __int64 volatile * _Value, __int64 _Mask);
838 
839 #endif /* _M_ARM || _M_X64 */
840 
841 #if defined(_M_ARM)
842 
843 int _AddSatInt(int, int);
844 double _CopyDoubleFromInt64(__int64);
845 float _CopyFloatFromInt32(__int32);
846 __int32 _CopyInt32FromFloat(float);
847 __int64 _CopyInt64FromDouble(double);
848 unsigned int _CountLeadingOnes(unsigned long);
849 unsigned int _CountLeadingOnes64(unsigned __int64);
850 unsigned int _CountLeadingSigns(long);
851 unsigned int _CountLeadingSigns64(__int64);
852 unsigned int _CountLeadingZeros(unsigned long);
853 unsigned int _CountLeadingZeros64(unsigned __int64);
854 unsigned int _CountOneBits(unsigned long);
855 unsigned int _CountOneBits64(unsigned __int64);
856 int _DAddSatInt(int, int);
857 int _DSubSatInt(int, int);
858 long _InterlockedAdd(_Interlocked_operand_ long volatile * _Addend, long _Value);
859 __int64 _InterlockedAdd64(_Interlocked_operand_ __int64 volatile * _Addend, __int64 _Value);
860 __int64 _InterlockedAdd64_acq(__int64 volatile * _Addend, __int64 _Value);
861 __int64 _InterlockedAdd64_nf(__int64 volatile * _Addend, __int64 _Value);
862 __int64 _InterlockedAdd64_rel(__int64 volatile * _Addend, __int64 _Value);
863 long _InterlockedAdd_acq(long volatile * _Addend, long _Value);
864 long _InterlockedAdd_nf(long volatile * _Addend, long _Value);
865 long _InterlockedAdd_rel(long volatile * _Addend, long _Value);
866 short _InterlockedAnd16_acq(short volatile * _Value, short _Mask);
867 short _InterlockedAnd16_nf(short volatile * _Value, short _Mask);
868 short _InterlockedAnd16_rel(short volatile * _Value, short _Mask);
869 __int64 _InterlockedAnd64_acq(__int64 volatile * _Value, __int64 _Mask);
870 __int64 _InterlockedAnd64_nf(__int64 volatile * _Value, __int64 _Mask);
871 __int64 _InterlockedAnd64_rel(__int64 volatile * _Value, __int64 _Mask);
872 char _InterlockedAnd8_acq(char volatile * _Value, char _Mask);
873 char _InterlockedAnd8_nf(char volatile * _Value, char _Mask);
874 char _InterlockedAnd8_rel(char volatile * _Value, char _Mask);
875 long _InterlockedAnd_acq(long volatile * _Value, long _Mask);
876 long _InterlockedAnd_nf(long volatile * _Value, long _Mask);
877 long _InterlockedAnd_rel(long volatile * _Value, long _Mask);
878 short _InterlockedCompareExchange16_acq(short volatile * _Destination, short _Exchange, short _Comparand);
879 short _InterlockedCompareExchange16_nf(short volatile * _Destination, short _Exchange, short _Comparand);
880 short _InterlockedCompareExchange16_rel(short volatile * _Destination, short _Exchange, short _Comparand);
881 __int64 _InterlockedCompareExchange64_acq(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand);
882 __int64 _InterlockedCompareExchange64_nf(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand);
883 __int64 _InterlockedCompareExchange64_rel(__int64 volatile * _Destination, __int64 _Exchange, __int64 _Comparand);
884 char _InterlockedCompareExchange8_acq(char volatile * _Destination, char _Exchange, char _Comparand);
885 char _InterlockedCompareExchange8_nf(char volatile * _Destination, char _Exchange, char _Comparand);
886 char _InterlockedCompareExchange8_rel(char volatile * _Destination, char _Exchange, char _Comparand);
887 void * _InterlockedCompareExchangePointer_acq(void * volatile * _Destination, void * _Exchange, void * _Comparand);
888 void * _InterlockedCompareExchangePointer_nf(void * volatile * _Destination, void * _Exchange, void * _Comparand);
889 void * _InterlockedCompareExchangePointer_rel(void * volatile * _Destination, void * _Exchange, void * _Comparand);
890 long _InterlockedCompareExchange_acq(long volatile * _Destination, long _Exchange, long _Comparand);
891 long _InterlockedCompareExchange_nf(long volatile * _Destination, long _Exchange, long _Comparand);
892 long _InterlockedCompareExchange_rel(long volatile * _Destination, long _Exchange, long _Comparand);
893 short _InterlockedDecrement16_acq(short volatile * _Addend);
894 short _InterlockedDecrement16_nf(short volatile * _Addend);
895 short _InterlockedDecrement16_rel(short volatile * _Addend);
896 __int64 _InterlockedDecrement64_acq(__int64 volatile * _Addend);
897 __int64 _InterlockedDecrement64_nf(__int64 volatile * _Addend);
898 __int64 _InterlockedDecrement64_rel(__int64 volatile * _Addend);
899 long _InterlockedDecrement_acq(long volatile * _Addend);
900 long _InterlockedDecrement_nf(long volatile * _Addend);
901 long _InterlockedDecrement_rel(long volatile * _Addend);
902 short _InterlockedExchange16_acq(short volatile * _Target, short _Value);
903 short _InterlockedExchange16_nf(short volatile * _Target, short _Value);
904 __int64 _InterlockedExchange64_acq(__int64 volatile * _Target, __int64 _Value);
905 __int64 _InterlockedExchange64_nf(__int64 volatile * _Target, __int64 _Value);
906 char _InterlockedExchange8_acq(char volatile * _Target, char _Value);
907 char _InterlockedExchange8_nf(char volatile * _Target, char _Value);
908 short _InterlockedExchangeAdd16_acq(short volatile * _Addend, short _Value);
909 short _InterlockedExchangeAdd16_nf(short volatile * _Addend, short _Value);
910 short _InterlockedExchangeAdd16_rel(short volatile * _Addend, short _Value);
911 __int64 _InterlockedExchangeAdd64_acq(__int64 volatile * _Addend, __int64 _Value);
912 __int64 _InterlockedExchangeAdd64_nf(__int64 volatile * _Addend, __int64 _Value);
913 __int64 _InterlockedExchangeAdd64_rel(__int64 volatile * _Addend, __int64 _Value);
914 char _InterlockedExchangeAdd8_acq(char volatile * _Addend, char _Value);
915 char _InterlockedExchangeAdd8_nf(char volatile * _Addend, char _Value);
916 char _InterlockedExchangeAdd8_rel(char volatile * _Addend, char _Value);
917 long _InterlockedExchangeAdd_acq(long volatile * _Addend, long _Value);
918 long _InterlockedExchangeAdd_nf(long volatile * _Addend, long _Value);
919 long _InterlockedExchangeAdd_rel(long volatile * _Addend, long _Value);
920 void * _InterlockedExchangePointer_acq(void * volatile * _Target, void * _Value);
921 void * _InterlockedExchangePointer_nf(void * volatile * _Target, void * _Value);
922 long _InterlockedExchange_acq(long volatile * _Target, long _Value);
923 long _InterlockedExchange_nf(long volatile * _Target, long _Value);
924 short _InterlockedIncrement16_acq(short volatile * _Addend);
925 short _InterlockedIncrement16_nf(short volatile * _Addend);
926 short _InterlockedIncrement16_rel(short volatile * _Addend);
927 __int64 _InterlockedIncrement64_acq(__int64 volatile * _Addend);
928 __int64 _InterlockedIncrement64_nf(__int64 volatile * _Addend);
929 __int64 _InterlockedIncrement64_rel(__int64 volatile * _Addend);
930 long _InterlockedIncrement_acq(long volatile * _Addend);
931 long _InterlockedIncrement_nf(long volatile * _Addend);
932 long _InterlockedIncrement_rel(long volatile * _Addend);
933 short _InterlockedOr16_acq(short volatile * _Value, short _Mask);
934 short _InterlockedOr16_nf(short volatile * _Value, short _Mask);
935 short _InterlockedOr16_rel(short volatile * _Value, short _Mask);
936 __int64 _InterlockedOr64_acq(__int64 volatile * _Value, __int64 _Mask);
937 __int64 _InterlockedOr64_nf(__int64 volatile * _Value, __int64 _Mask);
938 __int64 _InterlockedOr64_rel(__int64 volatile * _Value, __int64 _Mask);
939 char _InterlockedOr8_acq(char volatile * _Value, char _Mask);
940 char _InterlockedOr8_nf(char volatile * _Value, char _Mask);
941 char _InterlockedOr8_rel(char volatile * _Value, char _Mask);
942 long _InterlockedOr_acq(long volatile * _Value, long _Mask);
943 long _InterlockedOr_nf(long volatile * _Value, long _Mask);
944 long _InterlockedOr_rel(long volatile * _Value, long _Mask);
945 short _InterlockedXor16_acq(short volatile * _Value, short _Mask);
946 short _InterlockedXor16_nf(short volatile * _Value, short _Mask);
947 short _InterlockedXor16_rel(short volatile * _Value, short _Mask);
948 __int64 _InterlockedXor64_acq(__int64 volatile * _Value, __int64 _Mask);
949 __int64 _InterlockedXor64_nf(__int64 volatile * _Value, __int64 _Mask);
950 __int64 _InterlockedXor64_rel(_Interlocked_operand_ __int64 volatile * _Value, __int64 _Mask);
951 char _InterlockedXor8_acq(char volatile * _Value, char _Mask);
952 char _InterlockedXor8_nf(char volatile * _Value, char _Mask);
953 char _InterlockedXor8_rel(char volatile * _Value, char _Mask);
954 long _InterlockedXor_acq(long volatile * _Value, long _Mask);
955 long _InterlockedXor_nf(long volatile * _Value, long _Mask);
956 long _InterlockedXor_rel(long volatile * _Value, long _Mask);
957 #if _MSC_FULL_VER >= 170040825
958 unsigned int _MoveFromCoprocessor(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);
959 unsigned int _MoveFromCoprocessor2(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);
960 #else
961 int _MoveFromCoprocessor(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);
962 int _MoveFromCoprocessor2(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);
963 #endif
964 unsigned __int64 _MoveFromCoprocessor64(unsigned int, unsigned int, unsigned int);
965 void _MoveToCoprocessor(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);
966 void _MoveToCoprocessor2(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);
967 void _MoveToCoprocessor64(unsigned __int64, unsigned int, unsigned int, unsigned int);
968 long _MulHigh(long, long);
969 unsigned long _MulUnsignedHigh(unsigned long, unsigned long);
970 int _ReadStatusReg(int);
971 int _SubSatInt(int, int);
972 void _WriteStatusReg(int, int, int);
973 void __emit(unsigned __int32);
974 __int64 __ldrexd(__int64 volatile *);
975 void __cdecl __prefetch(const void *);
976 unsigned __int64 __rdpmccntr64(void);
977 void __sev(void);
978 void __static_assert(int, const char *);
979 unsigned int __swi(unsigned int, ...);
980 unsigned int __hvc(unsigned int, ...);
981 int __trap(int, ...);
982 __int16 __iso_volatile_load16(const volatile __int16 *);
983 __int32 __iso_volatile_load32(const volatile __int32 *);
984 __int64 __iso_volatile_load64(const volatile __int64 *);
985 __int8 __iso_volatile_load8(const volatile __int8 *);
986 void __iso_volatile_store16(volatile __int16 *, __int16);
987 void __iso_volatile_store32(volatile __int32 *, __int32);
988 void __iso_volatile_store64(volatile __int64 *, __int64);
989 void __iso_volatile_store8(volatile __int8 *, __int8);
990 void __wfe(void);
991 void __wfi(void);
992 void __yield(void);
993 unsigned char _interlockedbittestandreset_acq(long volatile *, long);
994 unsigned char _interlockedbittestandreset_nf(long volatile *, long);
995 unsigned char _interlockedbittestandreset_rel(long volatile *, long);
996 unsigned char _interlockedbittestandset_acq(long volatile *, long);
997 unsigned char _interlockedbittestandset_nf(long volatile *, long);
998 unsigned char _interlockedbittestandset_rel(long volatile *, long);
999 int _isunordered(double, double);
1000 int _isunorderedf(float, float);
1001 
1002 #endif /* _M_ARM */
1003 
1004 #if defined(_M_CEE_PURE)
1005 
1006 long _InterlockedCompareExchange(_Interlocked_operand_ long volatile * _Destination, long _Exchange, long _Comparand);
1007 long _InterlockedDecrement(_Interlocked_operand_ long volatile * _Addend);
1008 long _InterlockedIncrement(_Interlocked_operand_ long volatile * _Addend);
1009 
1010 #endif /* _M_CEE_PURE */
1011 
1012 #if defined(__cplusplus)
1013 }
1014 #endif /* __cplusplus */
1015 
1016 #if (defined(__GNUC__) || defined(__clang__)) && defined(_WIN32) // We can't use __MINGW32__ here
1017 #  include "mingw32/intrin.h"
1018 #elif defined(_MSC_VER)
1019 #  include "msc/intrin.h"
1020 #else
1021 #  error Please implement intrinsics for your target compiler
1022 #endif
1023 
1024 #endif
1025