1 /*
2 	Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 	Microsoft Visual C++ functions. Originally developed for the ReactOS
4 	(<https://reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 	projects.
6 
7 	Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8 
9 	Permission is hereby granted, free of charge, to any person obtaining a
10 	copy of this software and associated documentation files (the "Software"),
11 	to deal in the Software without restriction, including without limitation
12 	the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 	and/or sell copies of the Software, and to permit persons to whom the
14 	Software is furnished to do so, subject to the following conditions:
15 
16 	The above copyright notice and this permission notice shall be included in
17 	all copies or substantial portions of the Software.
18 
19 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 	FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 	DEALINGS IN THE SOFTWARE.
26 */
27 
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30 
31 /*
32 	FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 	behavior: some "obvious" memory barriers are not present in the Visual C++
34 	implementation - e.g. __stosX; on the other hand, some memory barriers that
35 	*are* present could have been missed
36 */
37 
38 /*
39 	NOTE: this is a *compatibility* header. Some functions may look wrong at
40 	first, but they're only "as wrong" as they would be on Visual C++. Our
41 	priority is compatibility
42 
43 	NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 	constraints and the uses of __volatile__ out of my... hat. Do not touch
45 	them. I hate cargo cult programming
46 
47 	NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 	barriers aren't there in Visual C++ (e.g. __stosX)
49 
50 	NOTE: review all intrinsics with a return value, add/remove __volatile__
51 	where necessary. If an intrinsic whose value is ignored generates a no-op
52 	under Visual C++, __volatile__ must be omitted; if it always generates code
53 	(for example, if it has side effects), __volatile__ must be specified. GCC
54 	will only optimize out non-volatile asm blocks with outputs, so input-only
55 	blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 	and follow Visual C++ behavior
57 
58 	NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 	atomic operations. Test the version like this:
60 
61 	#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 		...
63 
64 	Pay attention to the type of barrier. Make it match with what Visual C++
65 	would use in the same case
66 */
67 
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71 
72 /*** memcopy must be memmove ***/
73 void* __cdecl memmove(void* dest, const void* source, size_t num);
memcpy(void * dest,const void * source,size_t num)74 __INTRIN_INLINE void* __cdecl memcpy(void* dest, const void* source, size_t num)
75 {
76     return memmove(dest, source, num);
77 }
78 
79 
80 /*** Stack frame juggling ***/
81 #define _ReturnAddress() (__builtin_return_address(0))
82 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
83 /* TODO: __getcallerseflags but how??? */
84 
85 /*** Memory barriers ***/
86 
87 #if !HAS_BUILTIN(_ReadWriteBarrier)
_ReadWriteBarrier(void)88 __INTRIN_INLINE void _ReadWriteBarrier(void)
89 {
90 	__asm__ __volatile__("" : : : "memory");
91 }
92 #endif
93 
94 /* GCC only supports full barriers */
95 #define _ReadBarrier _ReadWriteBarrier
96 #define _WriteBarrier _ReadWriteBarrier
97 
98 #if !HAS_BUILTIN(_mm_mfence) && !defined(__clang__)
_mm_mfence(void)99 __INTRIN_INLINE void _mm_mfence(void)
100 {
101 	__asm__ __volatile__("mfence" : : : "memory");
102 }
103 #endif
104 
105 #if !HAS_BUILTIN(_mm_lfence)&& !defined(__clang__)
_mm_lfence(void)106 __INTRIN_INLINE void _mm_lfence(void)
107 {
108 	_ReadBarrier();
109 	__asm__ __volatile__("lfence");
110 	_ReadBarrier();
111 }
112 #endif
113 
114 #if defined(__x86_64__) && !HAS_BUILTIN(__faststorefence)
__faststorefence(void)115 __INTRIN_INLINE void __faststorefence(void)
116 {
117 	long local;
118 	__asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
119 }
120 #endif
121 
122 
123 /*** Atomic operations ***/
124 
125 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
126 
127 #if !HAS_BUILTIN(_InterlockedCompareExchange8)
_InterlockedCompareExchange8(volatile char * Destination,char Exchange,char Comperand)128 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
129 {
130 	return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
131 }
132 #endif
133 
134 #if !HAS_BUILTIN(_InterlockedCompareExchange16)
_InterlockedCompareExchange16(volatile short * Destination,short Exchange,short Comperand)135 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
136 {
137 	return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
138 }
139 #endif
140 
141 #if !HAS_BUILTIN(_InterlockedCompareExchange)
_InterlockedCompareExchange(volatile long * Destination,long Exchange,long Comperand)142 __INTRIN_INLINE long __cdecl _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
143 {
144 	return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
145 }
146 #endif
147 
148 #if !HAS_BUILTIN(_InterlockedCompareExchangePointer)
_InterlockedCompareExchangePointer(void * volatile * Destination,void * Exchange,void * Comperand)149 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
150 {
151 	return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
152 }
153 #endif
154 
155 #if !HAS_BUILTIN(_InterlockedExchange8)
_InterlockedExchange8(volatile char * Target,char Value)156 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
157 {
158 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
159 	__sync_synchronize();
160 	return __sync_lock_test_and_set(Target, Value);
161 }
162 #endif
163 
164 #if !HAS_BUILTIN(_InterlockedExchange16)
_InterlockedExchange16(volatile short * Target,short Value)165 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
166 {
167 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
168 	__sync_synchronize();
169 	return __sync_lock_test_and_set(Target, Value);
170 }
171 #endif
172 
173 #if !HAS_BUILTIN(_InterlockedExchange)
_InterlockedExchange(volatile long * Target,long Value)174 __INTRIN_INLINE long __cdecl _InterlockedExchange(volatile long * Target, long Value)
175 {
176 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
177 	__sync_synchronize();
178 	return __sync_lock_test_and_set(Target, Value);
179 }
180 #endif
181 
182 #if !HAS_BUILTIN(_InterlockedExchangePointer)
_InterlockedExchangePointer(void * volatile * Target,void * Value)183 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
184 {
185 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
186 	__sync_synchronize();
187 	return (void *)__sync_lock_test_and_set(Target, Value);
188 }
189 #endif
190 
191 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedExchange64)
_InterlockedExchange64(volatile long long * Target,long long Value)192 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * Target, long long Value)
193 {
194 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
195 	__sync_synchronize();
196 	return __sync_lock_test_and_set(Target, Value);
197 }
198 #endif
199 
200 #if !HAS_BUILTIN(_InterlockedExchangeAdd8)
_InterlockedExchangeAdd8(char volatile * Addend,char Value)201 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
202 {
203 	return __sync_fetch_and_add(Addend, Value);
204 }
205 #endif
206 
207 #if !HAS_BUILTIN(_InterlockedExchangeAdd16)
_InterlockedExchangeAdd16(volatile short * Addend,short Value)208 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
209 {
210 	return __sync_fetch_and_add(Addend, Value);
211 }
212 #endif
213 
214 #if !HAS_BUILTIN(_InterlockedExchangeAdd)
_InterlockedExchangeAdd(volatile long * Addend,long Value)215 __INTRIN_INLINE long __cdecl _InterlockedExchangeAdd(volatile long * Addend, long Value)
216 {
217 	return __sync_fetch_and_add(Addend, Value);
218 }
219 #endif
220 
221 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedExchangeAdd64)
_InterlockedExchangeAdd64(volatile long long * Addend,long long Value)222 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * Addend, long long Value)
223 {
224 	return __sync_fetch_and_add(Addend, Value);
225 }
226 #endif
227 
228 #if !HAS_BUILTIN(_InterlockedAnd8)
_InterlockedAnd8(volatile char * value,char mask)229 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
230 {
231 	return __sync_fetch_and_and(value, mask);
232 }
233 #endif
234 
235 #if !HAS_BUILTIN(_InterlockedAnd16)
_InterlockedAnd16(volatile short * value,short mask)236 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
237 {
238 	return __sync_fetch_and_and(value, mask);
239 }
240 #endif
241 
242 #if !HAS_BUILTIN(_InterlockedAnd)
_InterlockedAnd(volatile long * value,long mask)243 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
244 {
245 	return __sync_fetch_and_and(value, mask);
246 }
247 #endif
248 
249 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedAnd64)
_InterlockedAnd64(volatile long long * value,long long mask)250 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * value, long long mask)
251 {
252 	return __sync_fetch_and_and(value, mask);
253 }
254 #endif
255 
256 #if !HAS_BUILTIN(_InterlockedOr8)
_InterlockedOr8(volatile char * value,char mask)257 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
258 {
259 	return __sync_fetch_and_or(value, mask);
260 }
261 #endif
262 
263 #if !HAS_BUILTIN(_InterlockedOr16)
_InterlockedOr16(volatile short * value,short mask)264 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
265 {
266 	return __sync_fetch_and_or(value, mask);
267 }
268 #endif
269 
270 #if !HAS_BUILTIN(_InterlockedOr)
_InterlockedOr(volatile long * value,long mask)271 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
272 {
273 	return __sync_fetch_and_or(value, mask);
274 }
275 #endif
276 
277 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedOr64)
_InterlockedOr64(volatile long long * value,long long mask)278 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * value, long long mask)
279 {
280 	return __sync_fetch_and_or(value, mask);
281 }
282 #endif
283 
284 #if !HAS_BUILTIN(_InterlockedXor8)
_InterlockedXor8(volatile char * value,char mask)285 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
286 {
287 	return __sync_fetch_and_xor(value, mask);
288 }
289 #endif
290 
291 #if !HAS_BUILTIN(_InterlockedXor16)
_InterlockedXor16(volatile short * value,short mask)292 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
293 {
294 	return __sync_fetch_and_xor(value, mask);
295 }
296 #endif
297 
298 #if !HAS_BUILTIN(_InterlockedXor)
_InterlockedXor(volatile long * value,long mask)299 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
300 {
301 	return __sync_fetch_and_xor(value, mask);
302 }
303 #endif
304 
305 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedXor64)
_InterlockedXor64(volatile long long * value,long long mask)306 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * value, long long mask)
307 {
308 	return __sync_fetch_and_xor(value, mask);
309 }
310 #endif
311 
312 #if !HAS_BUILTIN(_InterlockedDecrement)
_InterlockedDecrement(volatile long * lpAddend)313 __INTRIN_INLINE long __cdecl _InterlockedDecrement(volatile long * lpAddend)
314 {
315 	return __sync_sub_and_fetch(lpAddend, 1);
316 }
317 #endif
318 
319 #if !HAS_BUILTIN(_InterlockedIncrement)
_InterlockedIncrement(volatile long * lpAddend)320 __INTRIN_INLINE long __cdecl _InterlockedIncrement(volatile long * lpAddend)
321 {
322 	return __sync_add_and_fetch(lpAddend, 1);
323 }
324 #endif
325 
326 #if !HAS_BUILTIN(_InterlockedDecrement16)
_InterlockedDecrement16(volatile short * lpAddend)327 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
328 {
329 	return __sync_sub_and_fetch(lpAddend, 1);
330 }
331 #endif
332 
333 #if !HAS_BUILTIN(_InterlockedIncrement16)
_InterlockedIncrement16(volatile short * lpAddend)334 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
335 {
336 	return __sync_add_and_fetch(lpAddend, 1);
337 }
338 #endif
339 
340 #if defined(__x86_64__)
341 #if !HAS_BUILTIN(_InterlockedDecrement64)
_InterlockedDecrement64(volatile long long * lpAddend)342 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
343 {
344 	return __sync_sub_and_fetch(lpAddend, 1);
345 }
346 #endif
347 
348 #if !HAS_BUILTIN(_InterlockedIncrement64)
_InterlockedIncrement64(volatile long long * lpAddend)349 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
350 {
351 	return __sync_add_and_fetch(lpAddend, 1);
352 }
353 #endif
354 #endif
355 
356 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
357 
358 #if !HAS_BUILTIN(_InterlockedCompareExchange8)
_InterlockedCompareExchange8(volatile char * Destination,char Exchange,char Comperand)359 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
360 {
361 	char retval = Comperand;
362 	__asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
363 	return retval;
364 }
365 #endif
366 
367 #if !HAS_BUILTIN(_InterlockedCompareExchange16)
_InterlockedCompareExchange16(volatile short * Destination,short Exchange,short Comperand)368 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
369 {
370 	short retval = Comperand;
371 	__asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
372 	return retval;
373 }
374 #endif
375 
376 #if !HAS_BUILTIN(_InterlockedCompareExchange)
_InterlockedCompareExchange(volatile long * Destination,long Exchange,long Comperand)377 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
378 {
379 	long retval = Comperand;
380 	__asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
381 	return retval;
382 }
383 #endif
384 
385 #if !HAS_BUILTIN(_InterlockedCompareExchangePointer)
_InterlockedCompareExchangePointer(void * volatile * Destination,void * Exchange,void * Comperand)386 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
387 {
388 	void * retval = (void *)Comperand;
389 	__asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
390 	return retval;
391 }
392 #endif
393 
394 #if !HAS_BUILTIN(_InterlockedExchange8)
_InterlockedExchange8(volatile char * Target,char Value)395 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
396 {
397 	char retval = Value;
398 	__asm__("xchgb %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
399 	return retval;
400 }
401 #endif
402 
403 #if !HAS_BUILTIN(_InterlockedExchange16)
_InterlockedExchange16(volatile short * Target,short Value)404 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
405 {
406 	short retval = Value;
407 	__asm__("xchgw %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
408 	return retval;
409 }
410 #endif
411 
412 #if !HAS_BUILTIN(_InterlockedExchange)
_InterlockedExchange(volatile long * Target,long Value)413 __INTRIN_INLINE long _InterlockedExchange(volatile long * Target, long Value)
414 {
415 	long retval = Value;
416 	__asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
417 	return retval;
418 }
419 #endif
420 
421 #if !HAS_BUILTIN(_InterlockedExchangePointer)
_InterlockedExchangePointer(void * volatile * Target,void * Value)422 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
423 {
424 	void * retval = Value;
425 	__asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
426 	return retval;
427 }
428 #endif
429 
430 #if !HAS_BUILTIN(_InterlockedExchangeAdd8)
_InterlockedExchangeAdd8(char volatile * Addend,char Value)431 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
432 {
433 	char retval = Value;
434 	__asm__("lock; xaddb %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
435 	return retval;
436 }
437 #endif
438 
439 #if !HAS_BUILTIN(_InterlockedExchangeAdd16)
_InterlockedExchangeAdd16(volatile short * Addend,short Value)440 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
441 {
442 	short retval = Value;
443 	__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
444 	return retval;
445 }
446 #endif
447 
448 #if !HAS_BUILTIN(_InterlockedExchangeAdd)
_InterlockedExchangeAdd(volatile long * Addend,long Value)449 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * Addend, long Value)
450 {
451 	long retval = Value;
452 	__asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
453 	return retval;
454 }
455 #endif
456 
457 #if !HAS_BUILTIN(_InterlockedAnd8)
_InterlockedAnd8(volatile char * value,char mask)458 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
459 {
460 	char x;
461 	char y;
462 
463 	y = *value;
464 
465 	do
466 	{
467 		x = y;
468 		y = _InterlockedCompareExchange8(value, x & mask, x);
469 	}
470 	while(y != x);
471 
472 	return y;
473 }
474 #endif
475 
476 #if !HAS_BUILTIN(_InterlockedAnd16)
_InterlockedAnd16(volatile short * value,short mask)477 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
478 {
479 	short x;
480 	short y;
481 
482 	y = *value;
483 
484 	do
485 	{
486 		x = y;
487 		y = _InterlockedCompareExchange16(value, x & mask, x);
488 	}
489 	while(y != x);
490 
491 	return y;
492 }
493 #endif
494 
495 #if !HAS_BUILTIN(_InterlockedAnd)
_InterlockedAnd(volatile long * value,long mask)496 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
497 {
498 	long x;
499 	long y;
500 
501 	y = *value;
502 
503 	do
504 	{
505 		x = y;
506 		y = _InterlockedCompareExchange(value, x & mask, x);
507 	}
508 	while(y != x);
509 
510 	return y;
511 }
512 #endif
513 
514 #if !HAS_BUILTIN(_InterlockedOr8)
_InterlockedOr8(volatile char * value,char mask)515 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
516 {
517 	char x;
518 	char y;
519 
520 	y = *value;
521 
522 	do
523 	{
524 		x = y;
525 		y = _InterlockedCompareExchange8(value, x | mask, x);
526 	}
527 	while(y != x);
528 
529 	return y;
530 }
531 #endif
532 
533 #if !HAS_BUILTIN(_InterlockedOr16)
_InterlockedOr16(volatile short * value,short mask)534 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
535 {
536 	short x;
537 	short y;
538 
539 	y = *value;
540 
541 	do
542 	{
543 		x = y;
544 		y = _InterlockedCompareExchange16(value, x | mask, x);
545 	}
546 	while(y != x);
547 
548 	return y;
549 }
550 #endif
551 
552 #if !HAS_BUILTIN(_InterlockedOr)
_InterlockedOr(volatile long * value,long mask)553 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
554 {
555 	long x;
556 	long y;
557 
558 	y = *value;
559 
560 	do
561 	{
562 		x = y;
563 		y = _InterlockedCompareExchange(value, x | mask, x);
564 	}
565 	while(y != x);
566 
567 	return y;
568 }
569 #endif
570 
571 #if !HAS_BUILTIN(_InterlockedXor8)
_InterlockedXor8(volatile char * value,char mask)572 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
573 {
574 	char x;
575 	char y;
576 
577 	y = *value;
578 
579 	do
580 	{
581 		x = y;
582 		y = _InterlockedCompareExchange8(value, x ^ mask, x);
583 	}
584 	while(y != x);
585 
586 	return y;
587 }
588 #endif
589 
590 #if !HAS_BUILTIN(_InterlockedXor16)
_InterlockedXor16(volatile short * value,short mask)591 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
592 {
593 	short x;
594 	short y;
595 
596 	y = *value;
597 
598 	do
599 	{
600 		x = y;
601 		y = _InterlockedCompareExchange16(value, x ^ mask, x);
602 	}
603 	while(y != x);
604 
605 	return y;
606 }
607 #endif
608 
609 #if !HAS_BUILTIN(_InterlockedXor)
_InterlockedXor(volatile long * value,long mask)610 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
611 {
612 	long x;
613 	long y;
614 
615 	y = *value;
616 
617 	do
618 	{
619 		x = y;
620 		y = _InterlockedCompareExchange(value, x ^ mask, x);
621 	}
622 	while(y != x);
623 
624 	return y;
625 }
626 #endif
627 
628 #if !HAS_BUILTIN(_InterlockedDecrement)
_InterlockedDecrement(volatile long * lpAddend)629 __INTRIN_INLINE long _InterlockedDecrement(volatile long * lpAddend)
630 {
631 	return _InterlockedExchangeAdd(lpAddend, -1) - 1;
632 }
633 #endif
634 
635 #if !HAS_BUILTIN(_InterlockedIncrement)
_InterlockedIncrement(volatile long * lpAddend)636 __INTRIN_INLINE long _InterlockedIncrement(volatile long * lpAddend)
637 {
638 	return _InterlockedExchangeAdd(lpAddend, 1) + 1;
639 }
640 #endif
641 
642 #if !HAS_BUILTIN(_InterlockedDecrement16)
_InterlockedDecrement16(volatile short * lpAddend)643 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
644 {
645 	return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
646 }
647 #endif
648 
649 #if !HAS_BUILTIN(_InterlockedIncrement16)
_InterlockedIncrement16(volatile short * lpAddend)650 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
651 {
652 	return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
653 }
654 #endif
655 
656 #if defined(__x86_64__)
657 #if !HAS_BUILTIN(_InterlockedDecrement64)
_InterlockedDecrement64(volatile long long * lpAddend)658 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
659 {
660 	return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
661 }
662 #endif
663 
664 #if !HAS_BUILTIN(_InterlockedIncrement64)
_InterlockedIncrement64(volatile long long * lpAddend)665 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
666 {
667 	return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
668 }
669 #endif
670 #endif
671 
672 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
673 
674 #if !HAS_BUILTIN(_InterlockedCompareExchange64)
675 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
676 
_InterlockedCompareExchange64(volatile long long * Destination,long long Exchange,long long Comperand)677 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
678 {
679 	return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
680 }
681 
682 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__) */
_InterlockedCompareExchange64(volatile long long * Destination,long long Exchange,long long Comperand)683 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
684 {
685 	long long retval = Comperand;
686 
687 	__asm__
688 	(
689 		"lock; cmpxchg8b %[Destination]" :
690 		[retval] "+A" (retval) :
691 			[Destination] "m" (*Destination),
692 			"b" ((unsigned long)((Exchange >>  0) & 0xFFFFFFFF)),
693 			"c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
694 		"memory"
695 	);
696 
697 	return retval;
698 }
699 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__) */
700 #endif /* !HAS_BUILTIN(_InterlockedCompareExchange64) */
701 
702 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedCompareExchange128)
_InterlockedCompareExchange128(_Interlocked_operand_ __int64 volatile * Destination,__int64 ExchangeHigh,__int64 ExchangeLow,__int64 * ComparandResult)703 __INTRIN_INLINE unsigned char _InterlockedCompareExchange128(_Interlocked_operand_ __int64 volatile* Destination, __int64 ExchangeHigh, __int64 ExchangeLow, __int64* ComparandResult)
704 {
705     __int64 xchg[2] = { ExchangeLow, ExchangeHigh };
706     return __sync_bool_compare_and_swap((__uint128_t*)Destination, *((__uint128_t*)ComparandResult), *((__uint128_t*)xchg));
707 }
708 #endif
709 
710 #ifdef __i386__
_InterlockedAddLargeStatistic(volatile long long * Addend,long Value)711 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * Addend, long Value)
712 {
713 	__asm__
714 	(
715 		"lock; addl %[Value], %[Lo32];"
716 		"jae LABEL%=;"
717 		"lock; adcl $0, %[Hi32];"
718 		"LABEL%=:;" :
719 		[Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
720 		[Value] "ir" (Value) :
721 		"memory"
722 	);
723 
724 	return Value;
725 }
726 #endif /* __i386__ */
727 
728 #if !HAS_BUILTIN(_interlockedbittestandreset)
_interlockedbittestandreset(volatile long * a,long b)729 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, long b)
730 {
731 	unsigned char retval;
732 	__asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
733 	return retval;
734 }
735 #endif
736 
737 #if defined(__x86_64__) && !HAS_BUILTIN(_interlockedbittestandreset64)
_interlockedbittestandreset64(volatile long long * a,long long b)738 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, long long b)
739 {
740 	unsigned char retval;
741 	__asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
742 	return retval;
743 }
744 
745 #endif
746 
747 #if !HAS_BUILTIN(_interlockedbittestandset)
_interlockedbittestandset(volatile long * a,long b)748 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, long b)
749 {
750 	unsigned char retval;
751 	__asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
752 	return retval;
753 }
754 #endif
755 
756 #if defined(__x86_64__) && !HAS_BUILTIN(_interlockedbittestandset64)
_interlockedbittestandset64(volatile long long * a,long long b)757 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, long long b)
758 {
759 	unsigned char retval;
760 	__asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
761 	return retval;
762 }
763 #endif
764 
765 /*** String operations ***/
766 
767 #if !HAS_BUILTIN(__stosb)
768 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
__stosb(unsigned char * Dest,unsigned char Data,size_t Count)769 __INTRIN_INLINE void __stosb(unsigned char * Dest, unsigned char Data, size_t Count)
770 {
771 	__asm__ __volatile__
772 	(
773 		"rep; stosb" :
774 		[Dest] "=D" (Dest), [Count] "=c" (Count) :
775 		"[Dest]" (Dest), "a" (Data), "[Count]" (Count)
776 	);
777 }
778 #endif
779 
__stosw(unsigned short * Dest,unsigned short Data,size_t Count)780 __INTRIN_INLINE void __stosw(unsigned short * Dest, unsigned short Data, size_t Count)
781 {
782 	__asm__ __volatile__
783 	(
784 		"rep; stosw" :
785 		[Dest] "=D" (Dest), [Count] "=c" (Count) :
786 		"[Dest]" (Dest), "a" (Data), "[Count]" (Count)
787 	);
788 }
789 
__stosd(unsigned long * Dest,unsigned long Data,size_t Count)790 __INTRIN_INLINE void __stosd(unsigned long * Dest, unsigned long Data, size_t Count)
791 {
792 	__asm__ __volatile__
793 	(
794 		"rep; stosl" :
795 		[Dest] "=D" (Dest), [Count] "=c" (Count) :
796 		"[Dest]" (Dest), "a" (Data), "[Count]" (Count)
797 	);
798 }
799 
800 #ifdef __x86_64__
__stosq(unsigned long long * Dest,unsigned long long Data,size_t Count)801 __INTRIN_INLINE void __stosq(unsigned long long * Dest, unsigned long long Data, size_t Count)
802 {
803 	__asm__ __volatile__
804 	(
805 		"rep; stosq" :
806 		[Dest] "=D" (Dest), [Count] "=c" (Count) :
807 		"[Dest]" (Dest), "a" (Data), "[Count]" (Count)
808 	);
809 }
810 #endif
811 
__movsb(unsigned char * Destination,const unsigned char * Source,size_t Count)812 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
813 {
814 	__asm__ __volatile__
815 	(
816 		"rep; movsb" :
817 		[Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
818 		"[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
819 	);
820 }
821 
__movsw(unsigned short * Destination,const unsigned short * Source,size_t Count)822 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
823 {
824 	__asm__ __volatile__
825 	(
826 		"rep; movsw" :
827 		[Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
828 		"[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
829 	);
830 }
831 
__movsd(unsigned long * Destination,const unsigned long * Source,size_t Count)832 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
833 {
834 	__asm__ __volatile__
835 	(
836 		"rep; movsl" :
837 		[Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
838 		"[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
839 	);
840 }
841 
842 #ifdef __x86_64__
__movsq(unsigned long long * Destination,const unsigned long long * Source,size_t Count)843 __INTRIN_INLINE void __movsq(unsigned long long * Destination, const unsigned long long * Source, size_t Count)
844 {
845 	__asm__ __volatile__
846 	(
847 		"rep; movsq" :
848 		[Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
849 		"[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
850 	);
851 }
852 #endif
853 
854 #if defined(__x86_64__)
855 
856 /*** GS segment addressing ***/
857 
__writegsbyte(unsigned long Offset,unsigned char Data)858 __INTRIN_INLINE void __writegsbyte(unsigned long Offset, unsigned char Data)
859 {
860 	__asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
861 }
862 
__writegsword(unsigned long Offset,unsigned short Data)863 __INTRIN_INLINE void __writegsword(unsigned long Offset, unsigned short Data)
864 {
865 	__asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
866 }
867 
__writegsdword(unsigned long Offset,unsigned long Data)868 __INTRIN_INLINE void __writegsdword(unsigned long Offset, unsigned long Data)
869 {
870 	__asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
871 }
872 
__writegsqword(unsigned long Offset,unsigned long long Data)873 __INTRIN_INLINE void __writegsqword(unsigned long Offset, unsigned long long Data)
874 {
875 	__asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
876 }
877 
878 #if !HAS_BUILTIN(__readgsbyte)
__readgsbyte(unsigned long Offset)879 __INTRIN_INLINE unsigned char __readgsbyte(unsigned long Offset)
880 {
881 	unsigned char value;
882 	__asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
883 	return value;
884 }
885 #endif
886 
887 #if !HAS_BUILTIN(__readgsword)
__readgsword(unsigned long Offset)888 __INTRIN_INLINE unsigned short __readgsword(unsigned long Offset)
889 {
890 	unsigned short value;
891 	__asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
892 	return value;
893 }
894 #endif
895 
896 #if !HAS_BUILTIN(__readgsdword)
__readgsdword(unsigned long Offset)897 __INTRIN_INLINE unsigned long __readgsdword(unsigned long Offset)
898 {
899 	unsigned long value;
900 	__asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
901 	return value;
902 }
903 #endif
904 
905 #if !HAS_BUILTIN(__readgsqword)
__readgsqword(unsigned long Offset)906 __INTRIN_INLINE unsigned long long __readgsqword(unsigned long Offset)
907 {
908 	unsigned long long value;
909 	__asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
910 	return value;
911 }
912 #endif
913 
__incgsbyte(unsigned long Offset)914 __INTRIN_INLINE void __incgsbyte(unsigned long Offset)
915 {
916 	__asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
917 }
918 
__incgsword(unsigned long Offset)919 __INTRIN_INLINE void __incgsword(unsigned long Offset)
920 {
921 	__asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
922 }
923 
__incgsdword(unsigned long Offset)924 __INTRIN_INLINE void __incgsdword(unsigned long Offset)
925 {
926 	__asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
927 }
928 
__incgsqword(unsigned long Offset)929 __INTRIN_INLINE void __incgsqword(unsigned long Offset)
930 {
931 	__asm__ __volatile__("incq %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
932 }
933 
__addgsbyte(unsigned long Offset,unsigned char Data)934 __INTRIN_INLINE void __addgsbyte(unsigned long Offset, unsigned char Data)
935 {
936 	__asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
937 }
938 
__addgsword(unsigned long Offset,unsigned short Data)939 __INTRIN_INLINE void __addgsword(unsigned long Offset, unsigned short Data)
940 {
941 	__asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
942 }
943 
__addgsdword(unsigned long Offset,unsigned long Data)944 __INTRIN_INLINE void __addgsdword(unsigned long Offset, unsigned long Data)
945 {
946 	__asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
947 }
948 
__addgsqword(unsigned long Offset,unsigned long long Data)949 __INTRIN_INLINE void __addgsqword(unsigned long Offset, unsigned long long Data)
950 {
951 	__asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
952 }
953 
954 #else /* defined(__x86_64__) */
955 
956 /*** FS segment addressing ***/
957 
__writefsbyte(unsigned long Offset,unsigned char Data)958 __INTRIN_INLINE void __writefsbyte(unsigned long Offset, unsigned char Data)
959 {
960 	__asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
961 }
962 
__writefsword(unsigned long Offset,unsigned short Data)963 __INTRIN_INLINE void __writefsword(unsigned long Offset, unsigned short Data)
964 {
965 	__asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
966 }
967 
__writefsdword(unsigned long Offset,unsigned long Data)968 __INTRIN_INLINE void __writefsdword(unsigned long Offset, unsigned long Data)
969 {
970 	__asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
971 }
972 
973 #if !HAS_BUILTIN(__readfsbyte)
__readfsbyte(unsigned long Offset)974 __INTRIN_INLINE unsigned char __readfsbyte(unsigned long Offset)
975 {
976 	unsigned char value;
977 	__asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
978 	return value;
979 }
980 #endif
981 
982 #if !HAS_BUILTIN(__readfsword)
__readfsword(unsigned long Offset)983 __INTRIN_INLINE unsigned short __readfsword(unsigned long Offset)
984 {
985 	unsigned short value;
986 	__asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
987 	return value;
988 }
989 #endif
990 
991 #if !HAS_BUILTIN(__readfsdword)
__readfsdword(unsigned long Offset)992 __INTRIN_INLINE unsigned long __readfsdword(unsigned long Offset)
993 {
994 	unsigned long value;
995 	__asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
996 	return value;
997 }
998 #endif
999 
__incfsbyte(unsigned long Offset)1000 __INTRIN_INLINE void __incfsbyte(unsigned long Offset)
1001 {
1002 	__asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
1003 }
1004 
__incfsword(unsigned long Offset)1005 __INTRIN_INLINE void __incfsword(unsigned long Offset)
1006 {
1007 	__asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
1008 }
1009 
__incfsdword(unsigned long Offset)1010 __INTRIN_INLINE void __incfsdword(unsigned long Offset)
1011 {
1012 	__asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
1013 }
1014 
1015 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
__addfsbyte(unsigned long Offset,unsigned char Data)1016 __INTRIN_INLINE void __addfsbyte(unsigned long Offset, unsigned char Data)
1017 {
1018 	if(!__builtin_constant_p(Offset))
1019 		__asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1020 	else
1021 		__asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1022 }
1023 
__addfsword(unsigned long Offset,unsigned short Data)1024 __INTRIN_INLINE void __addfsword(unsigned long Offset, unsigned short Data)
1025 {
1026 	if(!__builtin_constant_p(Offset))
1027 		__asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1028 	else
1029 		__asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1030 }
1031 
__addfsdword(unsigned long Offset,unsigned long Data)1032 __INTRIN_INLINE void __addfsdword(unsigned long Offset, unsigned long Data)
1033 {
1034 	if(!__builtin_constant_p(Offset))
1035 		__asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1036 	else
1037 		__asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1038 }
1039 
1040 #endif /* defined(__x86_64__) */
1041 
1042 
1043 /*** Bit manipulation ***/
1044 
1045 #if !HAS_BUILTIN(_BitScanForward)
_BitScanForward(unsigned long * Index,unsigned long Mask)1046 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * Index, unsigned long Mask)
1047 {
1048 	__asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1049 	return Mask ? 1 : 0;
1050 }
1051 #endif
1052 
1053 #if !HAS_BUILTIN(_BitScanReverse)
_BitScanReverse(unsigned long * Index,unsigned long Mask)1054 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * Index, unsigned long Mask)
1055 {
1056 	__asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1057 	return Mask ? 1 : 0;
1058 }
1059 #endif
1060 
1061 #if !HAS_BUILTIN(_bittest)
1062 /* NOTE: again, the bizarre implementation follows Visual C++ */
_bittest(const long * a,long b)1063 __INTRIN_INLINE unsigned char _bittest(const long * a, long b)
1064 {
1065 	unsigned char retval;
1066 
1067 	if(__builtin_constant_p(b))
1068 		__asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
1069 	else
1070 		__asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1071 
1072 	return retval;
1073 }
1074 #endif
1075 
1076 #ifdef __x86_64__
1077 #if !HAS_BUILTIN(_BitScanForward64)
_BitScanForward64(unsigned long * Index,unsigned long long Mask)1078 __INTRIN_INLINE unsigned char _BitScanForward64(unsigned long * Index, unsigned long long Mask)
1079 {
1080 	unsigned long long Index64;
1081 	__asm__("bsfq %[Mask], %[Index]" : [Index] "=r" (Index64) : [Mask] "mr" (Mask));
1082 	*Index = Index64;
1083 	return Mask ? 1 : 0;
1084 }
1085 #endif
1086 
1087 #if !HAS_BUILTIN(_BitScanReverse64)
_BitScanReverse64(unsigned long * Index,unsigned long long Mask)1088 __INTRIN_INLINE unsigned char _BitScanReverse64(unsigned long * Index, unsigned long long Mask)
1089 {
1090 	unsigned long long Index64;
1091 	__asm__("bsrq %[Mask], %[Index]" : [Index] "=r" (Index64) : [Mask] "mr" (Mask));
1092 	*Index = Index64;
1093 	return Mask ? 1 : 0;
1094 }
1095 #endif
1096 
1097 #if !HAS_BUILTIN(_bittest64)
_bittest64(const long long * a,long long b)1098 __INTRIN_INLINE unsigned char _bittest64(const long long * a, long long b)
1099 {
1100 	unsigned char retval;
1101 
1102 	if(__builtin_constant_p(b))
1103 		__asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
1104 	else
1105 		__asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1106 
1107 	return retval;
1108 }
1109 #endif
1110 #endif
1111 
1112 #if !HAS_BUILTIN(_bittestandcomplement)
_bittestandcomplement(long * a,long b)1113 __INTRIN_INLINE unsigned char _bittestandcomplement(long * a, long b)
1114 {
1115 	unsigned char retval;
1116 
1117 	if(__builtin_constant_p(b))
1118 		__asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1119 	else
1120 		__asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1121 
1122 	return retval;
1123 }
1124 #endif
1125 
1126 #if !HAS_BUILTIN(_bittestandreset)
_bittestandreset(long * a,long b)1127 __INTRIN_INLINE unsigned char _bittestandreset(long * a, long b)
1128 {
1129 	unsigned char retval;
1130 
1131 	if(__builtin_constant_p(b))
1132 		__asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1133 	else
1134 		__asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1135 
1136 	return retval;
1137 }
1138 #endif
1139 
1140 #if !HAS_BUILTIN(_bittestandset)
_bittestandset(long * a,long b)1141 __INTRIN_INLINE unsigned char _bittestandset(long * a, long b)
1142 {
1143 	unsigned char retval;
1144 
1145 	if(__builtin_constant_p(b))
1146 		__asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1147 	else
1148 		__asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1149 
1150 	return retval;
1151 }
1152 #endif
1153 
1154 #ifdef __x86_64__
1155 #if !HAS_BUILTIN(_bittestandset64)
_bittestandset64(long long * a,long long b)1156 __INTRIN_INLINE unsigned char _bittestandset64(long long * a, long long b)
1157 {
1158 	unsigned char retval;
1159 
1160 	if(__builtin_constant_p(b))
1161 		__asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1162 	else
1163 		__asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1164 
1165 	return retval;
1166 }
1167 #endif
1168 
1169 #if !HAS_BUILTIN(_bittestandreset64)
_bittestandreset64(long long * a,long long b)1170 __INTRIN_INLINE unsigned char _bittestandreset64(long long * a, long long b)
1171 {
1172 	unsigned char retval;
1173 
1174 	if(__builtin_constant_p(b))
1175 		__asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1176 	else
1177 		__asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1178 
1179 	return retval;
1180 }
1181 #endif
1182 
1183 #if !HAS_BUILTIN(_bittestandcomplement64)
_bittestandcomplement64(long long * a,long long b)1184 __INTRIN_INLINE unsigned char _bittestandcomplement64(long long * a, long long b)
1185 {
1186 	unsigned char retval;
1187 
1188 	if(__builtin_constant_p(b))
1189 		__asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1190 	else
1191 		__asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1192 
1193 	return retval;
1194 }
1195 #endif
1196 #endif /* __x86_64__ */
1197 
1198 #if !HAS_BUILTIN(_rotl8)
_rotl8(unsigned char value,unsigned char shift)1199 __INTRIN_INLINE unsigned char __cdecl _rotl8(unsigned char value, unsigned char shift)
1200 {
1201 	unsigned char retval;
1202 	__asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1203 	return retval;
1204 }
1205 #endif
1206 
1207 #if !HAS_BUILTIN(_rotl16)
_rotl16(unsigned short value,unsigned char shift)1208 __INTRIN_INLINE unsigned short __cdecl _rotl16(unsigned short value, unsigned char shift)
1209 {
1210 	unsigned short retval;
1211 	__asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1212 	return retval;
1213 }
1214 #endif
1215 
1216 #if !HAS_BUILTIN(_rotl)
_rotl(unsigned int value,int shift)1217 __INTRIN_INLINE unsigned int __cdecl _rotl(unsigned int value, int shift)
1218 {
1219 	unsigned int retval;
1220 	__asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1221 	return retval;
1222 }
1223 #endif
1224 
1225 #if !HAS_BUILTIN(_rotl64)
1226 #ifdef __x86_64__
_rotl64(unsigned long long value,int shift)1227 __INTRIN_INLINE unsigned long long _rotl64(unsigned long long value, int shift)
1228 {
1229 	unsigned long long retval;
1230 	__asm__("rolq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1231 	return retval;
1232 }
1233 #else /* __x86_64__ */
_rotl64(unsigned long long value,int shift)1234 __INTRIN_INLINE unsigned long long __cdecl _rotl64(unsigned long long value, int shift)
1235 {
1236     /* FIXME: this is probably not optimal */
1237     return (value << shift) | (value >> (64 - shift));
1238 }
1239 #endif /* __x86_64__ */
1240 #endif /* !HAS_BUILTIN(_rotl64) */
1241 
1242 #if !HAS_BUILTIN(_rotr)
_rotr(unsigned int value,int shift)1243 __INTRIN_INLINE unsigned int __cdecl _rotr(unsigned int value, int shift)
1244 {
1245 	unsigned int retval;
1246 	__asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1247 	return retval;
1248 }
1249 #endif
1250 
1251 #if !HAS_BUILTIN(_rotr8)
_rotr8(unsigned char value,unsigned char shift)1252 __INTRIN_INLINE unsigned char __cdecl _rotr8(unsigned char value, unsigned char shift)
1253 {
1254 	unsigned char retval;
1255 	__asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1256 	return retval;
1257 }
1258 #endif
1259 
1260 #if !HAS_BUILTIN(_rotr16)
_rotr16(unsigned short value,unsigned char shift)1261 __INTRIN_INLINE unsigned short __cdecl _rotr16(unsigned short value, unsigned char shift)
1262 {
1263 	unsigned short retval;
1264 	__asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1265 	return retval;
1266 }
1267 #endif
1268 
1269 #if !HAS_BUILTIN(_rotr64)
1270 #ifdef __x86_64__
_rotr64(unsigned long long value,int shift)1271 __INTRIN_INLINE unsigned long long _rotr64(unsigned long long value, int shift)
1272 {
1273 	unsigned long long retval;
1274 	__asm__("rorq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1275 	return retval;
1276 }
1277 #else /* __x86_64__ */
_rotr64(unsigned long long value,int shift)1278 __INTRIN_INLINE unsigned long long __cdecl _rotr64(unsigned long long value, int shift)
1279 {
1280     /* FIXME: this is probably not optimal */
1281     return (value >> shift) | (value << (64 - shift));
1282 }
1283 #endif /* __x86_64__ */
1284 #endif /* !HAS_BUILTIN(_rotr64) */
1285 
1286 #if !HAS_BUILTIN(_lrotl)
_lrotl(unsigned long value,int shift)1287 __INTRIN_INLINE unsigned long __cdecl _lrotl(unsigned long value, int shift)
1288 {
1289 	unsigned long retval;
1290 	__asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1291 	return retval;
1292 }
1293 #endif
1294 
1295 #if !HAS_BUILTIN(_lrotr)
_lrotr(unsigned long value,int shift)1296 __INTRIN_INLINE unsigned long __cdecl _lrotr(unsigned long value, int shift)
1297 {
1298 	unsigned long retval;
1299 	__asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1300 	return retval;
1301 }
1302 #endif
1303 
1304 #if defined __x86_64__
1305 #if defined(__clang__) && defined(_MSC_VER) // stupid hack because clang is broken
1306 static inline __attribute__((__always_inline__))
1307 #else
1308 __INTRIN_INLINE
1309 #endif
__ll_lshift(unsigned long long Mask,int Bit)1310 unsigned long long __ll_lshift(unsigned long long Mask, int Bit)
1311 {
1312     unsigned long long retval;
1313     unsigned char shift = Bit & 0x3F;
1314 
1315     __asm__
1316     (
1317         "shlq %[shift], %[Mask]" : "=r"(retval) : [Mask] "0"(Mask), [shift] "c"(shift)
1318     );
1319 
1320     return retval;
1321 }
1322 
__ll_rshift(long long Mask,int Bit)1323 __INTRIN_INLINE long long __ll_rshift(long long Mask, int Bit)
1324 {
1325     long long retval;
1326     unsigned char shift = Bit & 0x3F;
1327 
1328     __asm__
1329     (
1330         "sarq %[shift], %[Mask]" : "=r"(retval) : [Mask] "0"(Mask), [shift] "c"(shift)
1331     );
1332 
1333     return retval;
1334 }
1335 
__ull_rshift(unsigned long long Mask,int Bit)1336 __INTRIN_INLINE unsigned long long __ull_rshift(unsigned long long Mask, int Bit)
1337 {
1338     long long retval;
1339     unsigned char shift = Bit & 0x3F;
1340 
1341     __asm__
1342     (
1343         "shrq %[shift], %[Mask]" : "=r"(retval) : [Mask] "0"(Mask), [shift] "c"(shift)
1344     );
1345 
1346     return retval;
1347 }
1348 #else
1349 /*
1350 	NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1351 	constraint (edx:eax) for the Mask argument, because it's the only way GCC
1352 	can pass 64-bit operands around - passing the two 32 bit parts separately
1353 	just confuses it. Also we declare Bit as an int and then truncate it to
1354 	match Visual C++ behavior
1355 */
1356 #if defined(__clang__) && defined(_MSC_VER) // stupid hack because clang is broken
1357 static inline __attribute__((__always_inline__))
1358 #else
1359 __INTRIN_INLINE
1360 #endif
__ll_lshift(unsigned long long Mask,int Bit)1361 unsigned long long __ll_lshift(unsigned long long Mask, int Bit)
1362 {
1363 	unsigned long long retval = Mask;
1364 
1365 	__asm__
1366 	(
1367 		"shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1368 		"+A" (retval) :
1369 		[Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1370 	);
1371 
1372 	return retval;
1373 }
1374 
__ll_rshift(long long Mask,int Bit)1375 __INTRIN_INLINE long long __ll_rshift(long long Mask, int Bit)
1376 {
1377 	long long retval = Mask;
1378 
1379 	__asm__
1380 	(
1381 		"shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1382 		"+A" (retval) :
1383 		[Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1384 	);
1385 
1386 	return retval;
1387 }
1388 
__ull_rshift(unsigned long long Mask,int Bit)1389 __INTRIN_INLINE unsigned long long __ull_rshift(unsigned long long Mask, int Bit)
1390 {
1391 	unsigned long long retval = Mask;
1392 
1393 	__asm__
1394 	(
1395 		"shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1396 		"+A" (retval) :
1397 		[Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1398 	);
1399 
1400 	return retval;
1401 }
1402 #endif
1403 
_byteswap_ushort(unsigned short value)1404 __INTRIN_INLINE unsigned short __cdecl _byteswap_ushort(unsigned short value)
1405 {
1406 	unsigned short retval;
1407 	__asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1408 	return retval;
1409 }
1410 
_byteswap_ulong(unsigned long value)1411 __INTRIN_INLINE unsigned long __cdecl _byteswap_ulong(unsigned long value)
1412 {
1413 	unsigned long retval;
1414 	__asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1415 	return retval;
1416 }
1417 
1418 #ifdef __x86_64__
_byteswap_uint64(unsigned long long value)1419 __INTRIN_INLINE unsigned long long _byteswap_uint64(unsigned long long value)
1420 {
1421 	unsigned long long retval;
1422 	__asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1423 	return retval;
1424 }
1425 #else
_byteswap_uint64(unsigned long long value)1426 __INTRIN_INLINE unsigned long long __cdecl _byteswap_uint64(unsigned long long value)
1427 {
1428 	union {
1429 		unsigned long long int64part;
1430 		struct {
1431 			unsigned long lowpart;
1432 			unsigned long hipart;
1433 		};
1434 	} retval;
1435 	retval.int64part = value;
1436 	__asm__("bswapl %[lowpart]\n"
1437 	        "bswapl %[hipart]\n"
1438 	        : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart)  : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1439 	return retval.int64part;
1440 }
1441 #endif
1442 
1443 #if !HAS_BUILTIN(__lzcnt)
__lzcnt(unsigned int value)1444 __INTRIN_INLINE unsigned int __lzcnt(unsigned int value)
1445 {
1446 	return __builtin_clz(value);
1447 }
1448 #endif
1449 
1450 #if !HAS_BUILTIN(__lzcnt16)
__lzcnt16(unsigned short value)1451 __INTRIN_INLINE unsigned short __lzcnt16(unsigned short value)
1452 {
1453 	return __builtin_clz(value);
1454 }
1455 #endif
1456 
1457 #if !HAS_BUILTIN(__popcnt)
__popcnt(unsigned int value)1458 __INTRIN_INLINE unsigned int __popcnt(unsigned int value)
1459 {
1460 	return __builtin_popcount(value);
1461 }
1462 #endif
1463 
1464 #if !HAS_BUILTIN(__popcnt16)
__popcnt16(unsigned short value)1465 __INTRIN_INLINE unsigned short __popcnt16(unsigned short value)
1466 {
1467 	return __builtin_popcount(value);
1468 }
1469 #endif
1470 
1471 #ifdef __x86_64__
1472 #if !HAS_BUILTIN(__lzcnt64)
__lzcnt64(unsigned long long value)1473 __INTRIN_INLINE unsigned long long __lzcnt64(unsigned long long value)
1474 {
1475 	return __builtin_clzll(value);
1476 }
1477 #endif
1478 
1479 #if !HAS_BUILTIN(__popcnt64)
__popcnt64(unsigned long long value)1480 __INTRIN_INLINE unsigned long long __popcnt64(unsigned long long value)
1481 {
1482 	return __builtin_popcountll(value);
1483 }
1484 #endif
1485 #endif
1486 
1487 /*** 64-bit math ***/
1488 
1489 #if !HAS_BUILTIN(__emul)
__emul(int a,int b)1490 __INTRIN_INLINE long long __emul(int a, int b)
1491 {
1492 	long long retval;
1493 	__asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1494 	return retval;
1495 }
1496 #endif
1497 
1498 #if !HAS_BUILTIN(__emulu)
__emulu(unsigned int a,unsigned int b)1499 __INTRIN_INLINE unsigned long long __emulu(unsigned int a, unsigned int b)
1500 {
1501 	unsigned long long retval;
1502 	__asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1503 	return retval;
1504 }
1505 #endif
1506 
_abs64(long long value)1507 __INTRIN_INLINE long long __cdecl _abs64(long long value)
1508 {
1509     return (value >= 0) ? value : -value;
1510 }
1511 
1512 #ifdef __x86_64__
1513 #if !HAS_BUILTIN(__mulh)
__mulh(long long a,long long b)1514 __INTRIN_INLINE long long __mulh(long long a, long long b)
1515 {
1516 	long long retval;
1517 	__asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1518 	return retval;
1519 }
1520 #endif
1521 
1522 #if !HAS_BUILTIN(__umulh)
__umulh(unsigned long long a,unsigned long long b)1523 __INTRIN_INLINE unsigned long long __umulh(unsigned long long a, unsigned long long b)
1524 {
1525 	unsigned long long retval;
1526 	__asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1527 	return retval;
1528 }
1529 #endif
1530 #endif
1531 
1532 /*** Port I/O ***/
1533 
__inbyte(unsigned short Port)1534 __INTRIN_INLINE unsigned char __inbyte(unsigned short Port)
1535 {
1536 	unsigned char byte;
1537 	__asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1538 	return byte;
1539 }
1540 
__inword(unsigned short Port)1541 __INTRIN_INLINE unsigned short __inword(unsigned short Port)
1542 {
1543 	unsigned short word;
1544 	__asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1545 	return word;
1546 }
1547 
__indword(unsigned short Port)1548 __INTRIN_INLINE unsigned long __indword(unsigned short Port)
1549 {
1550 	unsigned long dword;
1551 	__asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1552 	return dword;
1553 }
1554 
__inbytestring(unsigned short Port,unsigned char * Buffer,unsigned long Count)1555 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1556 {
1557 	__asm__ __volatile__
1558 	(
1559 		"rep; insb" :
1560 		[Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1561 		"d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1562 		"memory"
1563 	);
1564 }
1565 
__inwordstring(unsigned short Port,unsigned short * Buffer,unsigned long Count)1566 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1567 {
1568 	__asm__ __volatile__
1569 	(
1570 		"rep; insw" :
1571 		[Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1572 		"d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1573 		"memory"
1574 	);
1575 }
1576 
__indwordstring(unsigned short Port,unsigned long * Buffer,unsigned long Count)1577 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1578 {
1579 	__asm__ __volatile__
1580 	(
1581 		"rep; insl" :
1582 		[Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1583 		"d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1584 		"memory"
1585 	);
1586 }
1587 
__outbyte(unsigned short Port,unsigned char Data)1588 __INTRIN_INLINE void __outbyte(unsigned short Port, unsigned char Data)
1589 {
1590 	__asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1591 }
1592 
__outword(unsigned short Port,unsigned short Data)1593 __INTRIN_INLINE void __outword(unsigned short Port, unsigned short Data)
1594 {
1595 	__asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1596 }
1597 
__outdword(unsigned short Port,unsigned long Data)1598 __INTRIN_INLINE void __outdword(unsigned short Port, unsigned long Data)
1599 {
1600 	__asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1601 }
1602 
__outbytestring(unsigned short Port,unsigned char * Buffer,unsigned long Count)1603 __INTRIN_INLINE void __outbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1604 {
1605 	__asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1606 }
1607 
__outwordstring(unsigned short Port,unsigned short * Buffer,unsigned long Count)1608 __INTRIN_INLINE void __outwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1609 {
1610 	__asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1611 }
1612 
__outdwordstring(unsigned short Port,unsigned long * Buffer,unsigned long Count)1613 __INTRIN_INLINE void __outdwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1614 {
1615 	__asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1616 }
1617 
_inp(unsigned short Port)1618 __INTRIN_INLINE int __cdecl _inp(unsigned short Port)
1619 {
1620 	return __inbyte(Port);
1621 }
1622 
_inpw(unsigned short Port)1623 __INTRIN_INLINE unsigned short __cdecl _inpw(unsigned short Port)
1624 {
1625 	return __inword(Port);
1626 }
1627 
_inpd(unsigned short Port)1628 __INTRIN_INLINE unsigned long __cdecl _inpd(unsigned short Port)
1629 {
1630 	return __indword(Port);
1631 }
1632 
_outp(unsigned short Port,int databyte)1633 __INTRIN_INLINE int __cdecl _outp(unsigned short Port, int databyte)
1634 {
1635 	__outbyte(Port, (unsigned char)databyte);
1636 	return databyte;
1637 }
1638 
_outpw(unsigned short Port,unsigned short dataword)1639 __INTRIN_INLINE unsigned short __cdecl _outpw(unsigned short Port, unsigned short dataword)
1640 {
1641 	__outword(Port, dataword);
1642 	return dataword;
1643 }
1644 
_outpd(unsigned short Port,unsigned long dataword)1645 __INTRIN_INLINE unsigned long __cdecl _outpd(unsigned short Port, unsigned long dataword)
1646 {
1647 	__outdword(Port, dataword);
1648 	return dataword;
1649 }
1650 
1651 
1652 /*** System information ***/
1653 
1654 #if !HAS_BUILTIN(__cpuid)
__cpuid(int CPUInfo[4],int InfoType)1655 __INTRIN_INLINE void __cpuid(int CPUInfo[4], int InfoType)
1656 {
1657 	__asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1658 }
1659 #endif
1660 
1661 #if !HAS_BUILTIN(__cpuidex)
__cpuidex(int CPUInfo[4],int InfoType,int ECXValue)1662 __INTRIN_INLINE void __cpuidex(int CPUInfo[4], int InfoType, int ECXValue)
1663 {
1664 	__asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType), "c" (ECXValue));
1665 }
1666 #endif
1667 
1668 #if !HAS_BUILTIN(__rdtsc)
__rdtsc(void)1669 __INTRIN_INLINE unsigned long long __rdtsc(void)
1670 {
1671 #ifdef __x86_64__
1672 	unsigned long long low, high;
1673 	__asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1674 	return low | (high << 32);
1675 #else
1676 	unsigned long long retval;
1677 	__asm__ __volatile__("rdtsc" : "=A"(retval));
1678 	return retval;
1679 #endif
1680 }
1681 #endif /* !HAS_BUILTIN(__rdtsc) */
1682 
__writeeflags(uintptr_t Value)1683 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1684 {
1685 	__asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1686 }
1687 
__readeflags(void)1688 __INTRIN_INLINE uintptr_t __readeflags(void)
1689 {
1690 	uintptr_t retval;
1691 	__asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1692 	return retval;
1693 }
1694 
1695 /*** Interrupts ***/
1696 
1697 #if !HAS_BUILTIN(__debugbreak)
__debugbreak(void)1698 __INTRIN_INLINE void __cdecl __debugbreak(void)
1699 {
1700 	__asm__("int $3");
1701 }
1702 #endif
1703 
1704 #if !HAS_BUILTIN(__ud2)
__ud2(void)1705 __INTRIN_INLINE void __ud2(void)
1706 {
1707 	__asm__("ud2");
1708 }
1709 #endif
1710 
1711 #if !HAS_BUILTIN(__int2c)
__int2c(void)1712 __INTRIN_INLINE void __int2c(void)
1713 {
1714 	__asm__("int $0x2c");
1715 }
1716 #endif
1717 
_disable(void)1718 __INTRIN_INLINE void __cdecl _disable(void)
1719 {
1720 	__asm__("cli" : : : "memory");
1721 }
1722 
_enable(void)1723 __INTRIN_INLINE void __cdecl _enable(void)
1724 {
1725 	__asm__("sti" : : : "memory");
1726 }
1727 
__halt(void)1728 __INTRIN_INLINE void __halt(void)
1729 {
1730 	__asm__("hlt" : : : "memory");
1731 }
1732 
1733 #if !HAS_BUILTIN(__fastfail)
1734 __declspec(noreturn)
__fastfail(unsigned int Code)1735 __INTRIN_INLINE void __fastfail(unsigned int Code)
1736 {
1737 	__asm__("int $0x29" : : "c"(Code) : "memory");
1738 	__builtin_unreachable();
1739 }
1740 #endif
1741 
1742 /*** Protected memory management ***/
1743 
1744 #ifdef __x86_64__
1745 
__writecr0(unsigned long long Data)1746 __INTRIN_INLINE void __writecr0(unsigned long long Data)
1747 {
1748 	__asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1749 }
1750 
__writecr3(unsigned long long Data)1751 __INTRIN_INLINE void __writecr3(unsigned long long Data)
1752 {
1753 	__asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1754 }
1755 
__writecr4(unsigned long long Data)1756 __INTRIN_INLINE void __writecr4(unsigned long long Data)
1757 {
1758 	__asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1759 }
1760 
__writecr8(unsigned long long Data)1761 __INTRIN_INLINE void __writecr8(unsigned long long Data)
1762 {
1763 	__asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1764 }
1765 
__readcr0(void)1766 __INTRIN_INLINE unsigned long long __readcr0(void)
1767 {
1768 	unsigned long long value;
1769 	__asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1770 	return value;
1771 }
1772 
__readcr2(void)1773 __INTRIN_INLINE unsigned long long __readcr2(void)
1774 {
1775 	unsigned long long value;
1776 	__asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1777 	return value;
1778 }
1779 
__readcr3(void)1780 __INTRIN_INLINE unsigned long long __readcr3(void)
1781 {
1782 	unsigned long long value;
1783 	__asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1784 	return value;
1785 }
1786 
__readcr4(void)1787 __INTRIN_INLINE unsigned long long __readcr4(void)
1788 {
1789 	unsigned long long value;
1790 	__asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1791 	return value;
1792 }
1793 
__readcr8(void)1794 __INTRIN_INLINE unsigned long long __readcr8(void)
1795 {
1796 	unsigned long long value;
1797 	__asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1798 	return value;
1799 }
1800 
1801 #else /* __x86_64__ */
1802 
__writecr0(unsigned int Data)1803 __INTRIN_INLINE void __writecr0(unsigned int Data)
1804 {
1805 	__asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1806 }
1807 
__writecr3(unsigned int Data)1808 __INTRIN_INLINE void __writecr3(unsigned int Data)
1809 {
1810 	__asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1811 }
1812 
__writecr4(unsigned int Data)1813 __INTRIN_INLINE void __writecr4(unsigned int Data)
1814 {
1815 	__asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1816 }
1817 
__readcr0(void)1818 __INTRIN_INLINE unsigned long __readcr0(void)
1819 {
1820 	unsigned long value;
1821 	__asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1822 	return value;
1823 }
1824 
__readcr2(void)1825 __INTRIN_INLINE unsigned long __readcr2(void)
1826 {
1827 	unsigned long value;
1828 	__asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1829 	return value;
1830 }
1831 
__readcr3(void)1832 __INTRIN_INLINE unsigned long __readcr3(void)
1833 {
1834 	unsigned long value;
1835 	__asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1836 	return value;
1837 }
1838 
__readcr4(void)1839 __INTRIN_INLINE unsigned long __readcr4(void)
1840 {
1841 	unsigned long value;
1842 	__asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1843 	return value;
1844 }
1845 
1846 #endif /* __x86_64__ */
1847 
1848 #ifdef __x86_64__
1849 
__readdr(unsigned int reg)1850 __INTRIN_INLINE unsigned long long __readdr(unsigned int reg)
1851 {
1852 	unsigned long long value;
1853 	switch (reg)
1854 	{
1855 		case 0:
1856 			__asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1857 			break;
1858 		case 1:
1859 			__asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1860 			break;
1861 		case 2:
1862 			__asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1863 			break;
1864 		case 3:
1865 			__asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1866 			break;
1867 		case 4:
1868 			__asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1869 			break;
1870 		case 5:
1871 			__asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1872 			break;
1873 		case 6:
1874 			__asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1875 			break;
1876 		case 7:
1877 			__asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1878 			break;
1879 	}
1880 	return value;
1881 }
1882 
__writedr(unsigned reg,unsigned long long value)1883 __INTRIN_INLINE void __writedr(unsigned reg, unsigned long long value)
1884 {
1885 	switch (reg)
1886 	{
1887 		case 0:
1888 			__asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1889 			break;
1890 		case 1:
1891 			__asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1892 			break;
1893 		case 2:
1894 			__asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1895 			break;
1896 		case 3:
1897 			__asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1898 			break;
1899 		case 4:
1900 			__asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1901 			break;
1902 		case 5:
1903 			__asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1904 			break;
1905 		case 6:
1906 			__asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1907 			break;
1908 		case 7:
1909 			__asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1910 			break;
1911 	}
1912 }
1913 
1914 #else /* __x86_64__ */
1915 
__readdr(unsigned int reg)1916 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1917 {
1918 	unsigned int value;
1919 	switch (reg)
1920 	{
1921 		case 0:
1922 			__asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1923 			break;
1924 		case 1:
1925 			__asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1926 			break;
1927 		case 2:
1928 			__asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1929 			break;
1930 		case 3:
1931 			__asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1932 			break;
1933 		case 4:
1934 			__asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1935 			break;
1936 		case 5:
1937 			__asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1938 			break;
1939 		case 6:
1940 			__asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1941 			break;
1942 		case 7:
1943 			__asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1944 			break;
1945 	}
1946 	return value;
1947 }
1948 
__writedr(unsigned reg,unsigned int value)1949 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1950 {
1951 	switch (reg)
1952 	{
1953 		case 0:
1954 			__asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1955 			break;
1956 		case 1:
1957 			__asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1958 			break;
1959 		case 2:
1960 			__asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1961 			break;
1962 		case 3:
1963 			__asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1964 			break;
1965 		case 4:
1966 			__asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1967 			break;
1968 		case 5:
1969 			__asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1970 			break;
1971 		case 6:
1972 			__asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1973 			break;
1974 		case 7:
1975 			__asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1976 			break;
1977 	}
1978 }
1979 
1980 #endif /* __x86_64__ */
1981 
__invlpg(void * Address)1982 __INTRIN_INLINE void __invlpg(void *Address)
1983 {
1984 	__asm__ __volatile__ ("invlpg (%[Address])" : : [Address] "b" (Address) : "memory");
1985 }
1986 
1987 
1988 /*** System operations ***/
1989 #if defined(__clang__) && defined(_MSC_VER) // stupid hack because clang is broken
1990 static inline __attribute__((__always_inline__))
1991 #else
1992 __INTRIN_INLINE
1993 #endif
__readmsr(unsigned long reg)1994 unsigned long long __readmsr(unsigned long reg)
1995 {
1996 #ifdef __x86_64__
1997 	unsigned long low, high;
1998 	__asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1999 	return ((unsigned long long)high << 32) | low;
2000 #else
2001 	unsigned long long retval;
2002 	__asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
2003 	return retval;
2004 #endif
2005 }
2006 
2007 #if defined(__clang__) && defined(_MSC_VER) // stupid hack because clang is broken
2008 static inline __attribute__((__always_inline__))
2009 #else
2010 __INTRIN_INLINE
2011 #endif
__writemsr(unsigned long Register,unsigned long long Value)2012 void __writemsr(unsigned long Register, unsigned long long Value)
2013 {
2014 #ifdef __x86_64__
2015 	__asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
2016 #else
2017 	__asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
2018 #endif
2019 }
2020 
__readpmc(unsigned long counter)2021 __INTRIN_INLINE unsigned long long __readpmc(unsigned long counter)
2022 {
2023 	unsigned long long retval;
2024 	__asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
2025 	return retval;
2026 }
2027 
2028 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
__segmentlimit(unsigned long a)2029 __INTRIN_INLINE unsigned long __segmentlimit(unsigned long a)
2030 {
2031 	unsigned long retval;
2032 	__asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
2033 	return retval;
2034 }
2035 
__wbinvd(void)2036 __INTRIN_INLINE void __wbinvd(void)
2037 {
2038 	__asm__ __volatile__("wbinvd" : : : "memory");
2039 }
2040 
__lidt(void * Source)2041 __INTRIN_INLINE void __lidt(void *Source)
2042 {
2043 	__asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
2044 }
2045 
__sidt(void * Destination)2046 __INTRIN_INLINE void __sidt(void *Destination)
2047 {
2048 	__asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
2049 }
2050 
_sgdt(void * Destination)2051 __INTRIN_INLINE void _sgdt(void *Destination)
2052 {
2053 	__asm__ __volatile__("sgdt %0" : : "m"(*(short*)Destination) : "memory");
2054 }
2055 
2056 /*** Misc operations ***/
2057 
2058 #if !HAS_BUILTIN(_mm_pause)
_mm_pause(void)2059 __INTRIN_INLINE void _mm_pause(void)
2060 {
2061 	__asm__ __volatile__("pause" : : : "memory");
2062 }
2063 #endif
2064 
__nop(void)2065 __INTRIN_INLINE void __nop(void)
2066 {
2067 	__asm__ __volatile__("nop");
2068 }
2069 
2070 #ifdef __cplusplus
2071 }
2072 #endif
2073 
2074 #endif /* KJK_INTRIN_X86_H_ */
2075 
2076 /* EOF */
2077