1 /*
2 	Compatibility <intrin_x86.h> header for GCC -- GCC equivalents of intrinsic
3 	Microsoft Visual C++ functions. Originally developed for the ReactOS
4 	(<https://reactos.org/>) and TinyKrnl (<http://www.tinykrnl.org/>)
5 	projects.
6 
7 	Copyright (c) 2006 KJK::Hyperion <hackbunny@reactos.com>
8 
9 	Permission is hereby granted, free of charge, to any person obtaining a
10 	copy of this software and associated documentation files (the "Software"),
11 	to deal in the Software without restriction, including without limitation
12 	the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 	and/or sell copies of the Software, and to permit persons to whom the
14 	Software is furnished to do so, subject to the following conditions:
15 
16 	The above copyright notice and this permission notice shall be included in
17 	all copies or substantial portions of the Software.
18 
19 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 	FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 	DEALINGS IN THE SOFTWARE.
26 */
27 
28 #ifndef KJK_INTRIN_X86_H_
29 #define KJK_INTRIN_X86_H_
30 
31 /*
32 	FIXME: review all "memory" clobbers, add/remove to match Visual C++
33 	behavior: some "obvious" memory barriers are not present in the Visual C++
34 	implementation - e.g. __stosX; on the other hand, some memory barriers that
35 	*are* present could have been missed
36 */
37 
38 /*
39 	NOTE: this is a *compatibility* header. Some functions may look wrong at
40 	first, but they're only "as wrong" as they would be on Visual C++. Our
41 	priority is compatibility
42 
43 	NOTE: unlike most people who write inline asm for GCC, I didn't pull the
44 	constraints and the uses of __volatile__ out of my... hat. Do not touch
45 	them. I hate cargo cult programming
46 
47 	NOTE: be very careful with declaring "memory" clobbers. Some "obvious"
48 	barriers aren't there in Visual C++ (e.g. __stosX)
49 
50 	NOTE: review all intrinsics with a return value, add/remove __volatile__
51 	where necessary. If an intrinsic whose value is ignored generates a no-op
52 	under Visual C++, __volatile__ must be omitted; if it always generates code
53 	(for example, if it has side effects), __volatile__ must be specified. GCC
54 	will only optimize out non-volatile asm blocks with outputs, so input-only
55 	blocks are safe. Oddities such as the non-volatile 'rdmsr' are intentional
56 	and follow Visual C++ behavior
57 
58 	NOTE: on GCC 4.1.0, please use the __sync_* built-ins for barriers and
59 	atomic operations. Test the version like this:
60 
61 	#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
62 		...
63 
64 	Pay attention to the type of barrier. Make it match with what Visual C++
65 	would use in the same case
66 */
67 
68 #ifdef __cplusplus
69 extern "C" {
70 #endif
71 
72 /*** memcopy must be memmove ***/
73 void* __cdecl memmove(void* dest, const void* source, size_t num);
74 __INTRIN_INLINE void* __cdecl memcpy(void* dest, const void* source, size_t num)
75 {
76     return memmove(dest, source, num);
77 }
78 
79 
80 /*** Stack frame juggling ***/
81 #define _ReturnAddress() (__builtin_return_address(0))
82 #define _AddressOfReturnAddress() (&(((void **)(__builtin_frame_address(0)))[1]))
83 /* TODO: __getcallerseflags but how??? */
84 
85 /*** Memory barriers ***/
86 
87 #if !HAS_BUILTIN(_ReadWriteBarrier)
88 __INTRIN_INLINE void _ReadWriteBarrier(void)
89 {
90 	__asm__ __volatile__("" : : : "memory");
91 }
92 #endif
93 
94 /* GCC only supports full barriers */
95 #define _ReadBarrier _ReadWriteBarrier
96 #define _WriteBarrier _ReadWriteBarrier
97 
98 #if !HAS_BUILTIN(_mm_mfence)
99 __INTRIN_INLINE void _mm_mfence(void)
100 {
101 	__asm__ __volatile__("mfence" : : : "memory");
102 }
103 #endif
104 
105 #if !HAS_BUILTIN(_mm_lfence)
106 __INTRIN_INLINE void _mm_lfence(void)
107 {
108 	_ReadBarrier();
109 	__asm__ __volatile__("lfence");
110 	_ReadBarrier();
111 }
112 #endif
113 
114 #if defined(__x86_64__) && !HAS_BUILTIN(__faststorefence)
115 __INTRIN_INLINE void __faststorefence(void)
116 {
117 	long local;
118 	__asm__ __volatile__("lock; orl $0, %0;" : : "m"(local));
119 }
120 #endif
121 
122 
123 /*** Atomic operations ***/
124 
125 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
126 
127 #if !HAS_BUILTIN(_InterlockedCompareExchange8)
128 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
129 {
130 	return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
131 }
132 #endif
133 
134 #if !HAS_BUILTIN(_InterlockedCompareExchange16)
135 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
136 {
137 	return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
138 }
139 #endif
140 
141 #if !HAS_BUILTIN(_InterlockedCompareExchange)
142 __INTRIN_INLINE long __cdecl _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
143 {
144 	return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
145 }
146 #endif
147 
148 #if !HAS_BUILTIN(_InterlockedCompareExchangePointer)
149 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
150 {
151 	return (void *)__sync_val_compare_and_swap(Destination, Comperand, Exchange);
152 }
153 #endif
154 
155 #if !HAS_BUILTIN(_InterlockedExchange8)
156 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
157 {
158 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
159 	__sync_synchronize();
160 	return __sync_lock_test_and_set(Target, Value);
161 }
162 #endif
163 
164 #if !HAS_BUILTIN(_InterlockedExchange16)
165 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
166 {
167 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
168 	__sync_synchronize();
169 	return __sync_lock_test_and_set(Target, Value);
170 }
171 #endif
172 
173 #if !HAS_BUILTIN(_InterlockedExchange)
174 __INTRIN_INLINE long __cdecl _InterlockedExchange(volatile long * Target, long Value)
175 {
176 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
177 	__sync_synchronize();
178 	return __sync_lock_test_and_set(Target, Value);
179 }
180 #endif
181 
182 #if !HAS_BUILTIN(_InterlockedExchangePointer)
183 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
184 {
185 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
186 	__sync_synchronize();
187 	return (void *)__sync_lock_test_and_set(Target, Value);
188 }
189 #endif
190 
191 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedExchange64)
192 __INTRIN_INLINE long long _InterlockedExchange64(volatile long long * Target, long long Value)
193 {
194 	/* NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier */
195 	__sync_synchronize();
196 	return __sync_lock_test_and_set(Target, Value);
197 }
198 #endif
199 
200 #if !HAS_BUILTIN(_InterlockedExchangeAdd8)
201 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
202 {
203 	return __sync_fetch_and_add(Addend, Value);
204 }
205 #endif
206 
207 #if !HAS_BUILTIN(_InterlockedExchangeAdd16)
208 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
209 {
210 	return __sync_fetch_and_add(Addend, Value);
211 }
212 #endif
213 
214 #if !HAS_BUILTIN(_InterlockedExchangeAdd)
215 __INTRIN_INLINE long __cdecl _InterlockedExchangeAdd(volatile long * Addend, long Value)
216 {
217 	return __sync_fetch_and_add(Addend, Value);
218 }
219 #endif
220 
221 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedExchangeAdd64)
222 __INTRIN_INLINE long long _InterlockedExchangeAdd64(volatile long long * Addend, long long Value)
223 {
224 	return __sync_fetch_and_add(Addend, Value);
225 }
226 #endif
227 
228 #if !HAS_BUILTIN(_InterlockedAnd8)
229 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
230 {
231 	return __sync_fetch_and_and(value, mask);
232 }
233 #endif
234 
235 #if !HAS_BUILTIN(_InterlockedAnd16)
236 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
237 {
238 	return __sync_fetch_and_and(value, mask);
239 }
240 #endif
241 
242 #if !HAS_BUILTIN(_InterlockedAnd)
243 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
244 {
245 	return __sync_fetch_and_and(value, mask);
246 }
247 #endif
248 
249 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedAnd64)
250 __INTRIN_INLINE long long _InterlockedAnd64(volatile long long * value, long long mask)
251 {
252 	return __sync_fetch_and_and(value, mask);
253 }
254 #endif
255 
256 #if !HAS_BUILTIN(_InterlockedOr8)
257 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
258 {
259 	return __sync_fetch_and_or(value, mask);
260 }
261 #endif
262 
263 #if !HAS_BUILTIN(_InterlockedOr16)
264 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
265 {
266 	return __sync_fetch_and_or(value, mask);
267 }
268 #endif
269 
270 #if !HAS_BUILTIN(_InterlockedOr)
271 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
272 {
273 	return __sync_fetch_and_or(value, mask);
274 }
275 #endif
276 
277 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedOr64)
278 __INTRIN_INLINE long long _InterlockedOr64(volatile long long * value, long long mask)
279 {
280 	return __sync_fetch_and_or(value, mask);
281 }
282 #endif
283 
284 #if !HAS_BUILTIN(_InterlockedXor8)
285 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
286 {
287 	return __sync_fetch_and_xor(value, mask);
288 }
289 #endif
290 
291 #if !HAS_BUILTIN(_InterlockedXor16)
292 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
293 {
294 	return __sync_fetch_and_xor(value, mask);
295 }
296 #endif
297 
298 #if !HAS_BUILTIN(_InterlockedXor)
299 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
300 {
301 	return __sync_fetch_and_xor(value, mask);
302 }
303 #endif
304 
305 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedXor64)
306 __INTRIN_INLINE long long _InterlockedXor64(volatile long long * value, long long mask)
307 {
308 	return __sync_fetch_and_xor(value, mask);
309 }
310 #endif
311 
312 #if !HAS_BUILTIN(_InterlockedDecrement)
313 __INTRIN_INLINE long __cdecl _InterlockedDecrement(volatile long * lpAddend)
314 {
315 	return __sync_sub_and_fetch(lpAddend, 1);
316 }
317 #endif
318 
319 #if !HAS_BUILTIN(_InterlockedIncrement)
320 __INTRIN_INLINE long __cdecl _InterlockedIncrement(volatile long * lpAddend)
321 {
322 	return __sync_add_and_fetch(lpAddend, 1);
323 }
324 #endif
325 
326 #if !HAS_BUILTIN(_InterlockedDecrement16)
327 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
328 {
329 	return __sync_sub_and_fetch(lpAddend, 1);
330 }
331 #endif
332 
333 #if !HAS_BUILTIN(_InterlockedIncrement16)
334 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
335 {
336 	return __sync_add_and_fetch(lpAddend, 1);
337 }
338 #endif
339 
340 #if defined(__x86_64__)
341 #if !HAS_BUILTIN(_InterlockedDecrement64)
342 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
343 {
344 	return __sync_sub_and_fetch(lpAddend, 1);
345 }
346 #endif
347 
348 #if !HAS_BUILTIN(_InterlockedIncrement64)
349 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
350 {
351 	return __sync_add_and_fetch(lpAddend, 1);
352 }
353 #endif
354 #endif
355 
356 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
357 
358 #if !HAS_BUILTIN(_InterlockedCompareExchange8)
359 __INTRIN_INLINE char _InterlockedCompareExchange8(volatile char * Destination, char Exchange, char Comperand)
360 {
361 	char retval = Comperand;
362 	__asm__("lock; cmpxchgb %b[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
363 	return retval;
364 }
365 #endif
366 
367 #if !HAS_BUILTIN(_InterlockedCompareExchange16)
368 __INTRIN_INLINE short _InterlockedCompareExchange16(volatile short * Destination, short Exchange, short Comperand)
369 {
370 	short retval = Comperand;
371 	__asm__("lock; cmpxchgw %w[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
372 	return retval;
373 }
374 #endif
375 
376 #if !HAS_BUILTIN(_InterlockedCompareExchange)
377 __INTRIN_INLINE long _InterlockedCompareExchange(volatile long * Destination, long Exchange, long Comperand)
378 {
379 	long retval = Comperand;
380 	__asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "+a" (retval) : [Destination] "m" (*Destination), [Exchange] "q" (Exchange): "memory");
381 	return retval;
382 }
383 #endif
384 
385 #if !HAS_BUILTIN(_InterlockedCompareExchangePointer)
386 __INTRIN_INLINE void * _InterlockedCompareExchangePointer(void * volatile * Destination, void * Exchange, void * Comperand)
387 {
388 	void * retval = (void *)Comperand;
389 	__asm__("lock; cmpxchgl %k[Exchange], %[Destination]" : [retval] "=a" (retval) : "[retval]" (retval), [Destination] "m" (*Destination), [Exchange] "q" (Exchange) : "memory");
390 	return retval;
391 }
392 #endif
393 
394 #if !HAS_BUILTIN(_InterlockedExchange8)
395 __INTRIN_INLINE char _InterlockedExchange8(volatile char * Target, char Value)
396 {
397 	char retval = Value;
398 	__asm__("xchgb %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
399 	return retval;
400 }
401 #endif
402 
403 #if !HAS_BUILTIN(_InterlockedExchange16)
404 __INTRIN_INLINE short _InterlockedExchange16(volatile short * Target, short Value)
405 {
406 	short retval = Value;
407 	__asm__("xchgw %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
408 	return retval;
409 }
410 #endif
411 
412 #if !HAS_BUILTIN(_InterlockedExchange)
413 __INTRIN_INLINE long _InterlockedExchange(volatile long * Target, long Value)
414 {
415 	long retval = Value;
416 	__asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
417 	return retval;
418 }
419 #endif
420 
421 #if !HAS_BUILTIN(_InterlockedExchangePointer)
422 __INTRIN_INLINE void * _InterlockedExchangePointer(void * volatile * Target, void * Value)
423 {
424 	void * retval = Value;
425 	__asm__("xchgl %[retval], %[Target]" : [retval] "+r" (retval) : [Target] "m" (*Target) : "memory");
426 	return retval;
427 }
428 #endif
429 
430 #if !HAS_BUILTIN(_InterlockedExchangeAdd8)
431 __INTRIN_INLINE char _InterlockedExchangeAdd8(char volatile * Addend, char Value)
432 {
433 	char retval = Value;
434 	__asm__("lock; xaddb %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
435 	return retval;
436 }
437 #endif
438 
439 #if !HAS_BUILTIN(_InterlockedExchangeAdd16)
440 __INTRIN_INLINE short _InterlockedExchangeAdd16(volatile short * Addend, short Value)
441 {
442 	short retval = Value;
443 	__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
444 	return retval;
445 }
446 #endif
447 
448 #if !HAS_BUILTIN(_InterlockedExchangeAdd)
449 __INTRIN_INLINE long _InterlockedExchangeAdd(volatile long * Addend, long Value)
450 {
451 	long retval = Value;
452 	__asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
453 	return retval;
454 }
455 #endif
456 
457 #if !HAS_BUILTIN(_InterlockedAnd8)
458 __INTRIN_INLINE char _InterlockedAnd8(volatile char * value, char mask)
459 {
460 	char x;
461 	char y;
462 
463 	y = *value;
464 
465 	do
466 	{
467 		x = y;
468 		y = _InterlockedCompareExchange8(value, x & mask, x);
469 	}
470 	while(y != x);
471 
472 	return y;
473 }
474 #endif
475 
476 #if !HAS_BUILTIN(_InterlockedAnd16)
477 __INTRIN_INLINE short _InterlockedAnd16(volatile short * value, short mask)
478 {
479 	short x;
480 	short y;
481 
482 	y = *value;
483 
484 	do
485 	{
486 		x = y;
487 		y = _InterlockedCompareExchange16(value, x & mask, x);
488 	}
489 	while(y != x);
490 
491 	return y;
492 }
493 #endif
494 
495 #if !HAS_BUILTIN(_InterlockedAnd)
496 __INTRIN_INLINE long _InterlockedAnd(volatile long * value, long mask)
497 {
498 	long x;
499 	long y;
500 
501 	y = *value;
502 
503 	do
504 	{
505 		x = y;
506 		y = _InterlockedCompareExchange(value, x & mask, x);
507 	}
508 	while(y != x);
509 
510 	return y;
511 }
512 #endif
513 
514 #if !HAS_BUILTIN(_InterlockedOr8)
515 __INTRIN_INLINE char _InterlockedOr8(volatile char * value, char mask)
516 {
517 	char x;
518 	char y;
519 
520 	y = *value;
521 
522 	do
523 	{
524 		x = y;
525 		y = _InterlockedCompareExchange8(value, x | mask, x);
526 	}
527 	while(y != x);
528 
529 	return y;
530 }
531 #endif
532 
533 #if !HAS_BUILTIN(_InterlockedOr16)
534 __INTRIN_INLINE short _InterlockedOr16(volatile short * value, short mask)
535 {
536 	short x;
537 	short y;
538 
539 	y = *value;
540 
541 	do
542 	{
543 		x = y;
544 		y = _InterlockedCompareExchange16(value, x | mask, x);
545 	}
546 	while(y != x);
547 
548 	return y;
549 }
550 #endif
551 
552 #if !HAS_BUILTIN(_InterlockedOr)
553 __INTRIN_INLINE long _InterlockedOr(volatile long * value, long mask)
554 {
555 	long x;
556 	long y;
557 
558 	y = *value;
559 
560 	do
561 	{
562 		x = y;
563 		y = _InterlockedCompareExchange(value, x | mask, x);
564 	}
565 	while(y != x);
566 
567 	return y;
568 }
569 #endif
570 
571 #if !HAS_BUILTIN(_InterlockedXor8)
572 __INTRIN_INLINE char _InterlockedXor8(volatile char * value, char mask)
573 {
574 	char x;
575 	char y;
576 
577 	y = *value;
578 
579 	do
580 	{
581 		x = y;
582 		y = _InterlockedCompareExchange8(value, x ^ mask, x);
583 	}
584 	while(y != x);
585 
586 	return y;
587 }
588 #endif
589 
590 #if !HAS_BUILTIN(_InterlockedXor16)
591 __INTRIN_INLINE short _InterlockedXor16(volatile short * value, short mask)
592 {
593 	short x;
594 	short y;
595 
596 	y = *value;
597 
598 	do
599 	{
600 		x = y;
601 		y = _InterlockedCompareExchange16(value, x ^ mask, x);
602 	}
603 	while(y != x);
604 
605 	return y;
606 }
607 #endif
608 
609 #if !HAS_BUILTIN(_InterlockedXor)
610 __INTRIN_INLINE long _InterlockedXor(volatile long * value, long mask)
611 {
612 	long x;
613 	long y;
614 
615 	y = *value;
616 
617 	do
618 	{
619 		x = y;
620 		y = _InterlockedCompareExchange(value, x ^ mask, x);
621 	}
622 	while(y != x);
623 
624 	return y;
625 }
626 #endif
627 
628 #if !HAS_BUILTIN(_InterlockedDecrement)
629 __INTRIN_INLINE long _InterlockedDecrement(volatile long * lpAddend)
630 {
631 	return _InterlockedExchangeAdd(lpAddend, -1) - 1;
632 }
633 #endif
634 
635 #if !HAS_BUILTIN(_InterlockedIncrement)
636 __INTRIN_INLINE long _InterlockedIncrement(volatile long * lpAddend)
637 {
638 	return _InterlockedExchangeAdd(lpAddend, 1) + 1;
639 }
640 #endif
641 
642 #if !HAS_BUILTIN(_InterlockedDecrement16)
643 __INTRIN_INLINE short _InterlockedDecrement16(volatile short * lpAddend)
644 {
645 	return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
646 }
647 #endif
648 
649 #if !HAS_BUILTIN(_InterlockedIncrement16)
650 __INTRIN_INLINE short _InterlockedIncrement16(volatile short * lpAddend)
651 {
652 	return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
653 }
654 #endif
655 
656 #if defined(__x86_64__)
657 #if !HAS_BUILTIN(_InterlockedDecrement64)
658 __INTRIN_INLINE long long _InterlockedDecrement64(volatile long long * lpAddend)
659 {
660 	return _InterlockedExchangeAdd64(lpAddend, -1) - 1;
661 }
662 #endif
663 
664 #if !HAS_BUILTIN(_InterlockedIncrement64)
665 __INTRIN_INLINE long long _InterlockedIncrement64(volatile long long * lpAddend)
666 {
667 	return _InterlockedExchangeAdd64(lpAddend, 1) + 1;
668 }
669 #endif
670 #endif
671 
672 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 */
673 
674 #if !HAS_BUILTIN(_InterlockedCompareExchange64)
675 #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__)
676 
677 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
678 {
679 	return __sync_val_compare_and_swap(Destination, Comperand, Exchange);
680 }
681 
682 #else /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__) */
683 __INTRIN_INLINE long long _InterlockedCompareExchange64(volatile long long * Destination, long long Exchange, long long Comperand)
684 {
685 	long long retval = Comperand;
686 
687 	__asm__
688 	(
689 		"lock; cmpxchg8b %[Destination]" :
690 		[retval] "+A" (retval) :
691 			[Destination] "m" (*Destination),
692 			"b" ((unsigned long)((Exchange >>  0) & 0xFFFFFFFF)),
693 			"c" ((unsigned long)((Exchange >> 32) & 0xFFFFFFFF)) :
694 		"memory"
695 	);
696 
697 	return retval;
698 }
699 #endif /* (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 && defined(__x86_64__) */
700 #endif /* !HAS_BUILTIN(_InterlockedCompareExchange64) */
701 
702 #if defined(__x86_64__) && !HAS_BUILTIN(_InterlockedCompareExchange128)
703 __INTRIN_INLINE unsigned char _InterlockedCompareExchange128(_Interlocked_operand_ __int64 volatile* Destination, __int64 ExchangeHigh, __int64 ExchangeLow, __int64* ComparandResult)
704 {
705     __int64 xchg[2] = { ExchangeLow, ExchangeHigh };
706     return __sync_bool_compare_and_swap((__uint128_t*)Destination, *((__uint128_t*)ComparandResult), *((__uint128_t*)xchg));
707 }
708 #endif
709 
710 #ifdef __i386__
711 __INTRIN_INLINE long _InterlockedAddLargeStatistic(volatile long long * Addend, long Value)
712 {
713 	__asm__
714 	(
715 		"lock; addl %[Value], %[Lo32];"
716 		"jae LABEL%=;"
717 		"lock; adcl $0, %[Hi32];"
718 		"LABEL%=:;" :
719 		[Lo32] "+m" (*((volatile long *)(Addend) + 0)), [Hi32] "+m" (*((volatile long *)(Addend) + 1)) :
720 		[Value] "ir" (Value) :
721 		"memory"
722 	);
723 
724 	return Value;
725 }
726 #endif /* __i386__ */
727 
728 #if !HAS_BUILTIN(_interlockedbittestandreset)
729 __INTRIN_INLINE unsigned char _interlockedbittestandreset(volatile long * a, long b)
730 {
731 	unsigned char retval;
732 	__asm__("lock; btrl %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
733 	return retval;
734 }
735 #endif
736 
737 #if defined(__x86_64__) && !HAS_BUILTIN(_interlockedbittestandreset64)
738 __INTRIN_INLINE unsigned char _interlockedbittestandreset64(volatile long long * a, long long b)
739 {
740 	unsigned char retval;
741 	__asm__("lock; btrq %[b], %[a]; setb %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
742 	return retval;
743 }
744 
745 #endif
746 
747 #if !HAS_BUILTIN(_interlockedbittestandset)
748 __INTRIN_INLINE unsigned char _interlockedbittestandset(volatile long * a, long b)
749 {
750 	unsigned char retval;
751 	__asm__("lock; btsl %[b], %[a]; setc %b[retval]" : [retval] "=q" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
752 	return retval;
753 }
754 #endif
755 
756 #if defined(__x86_64__) && !HAS_BUILTIN(_interlockedbittestandset64)
757 __INTRIN_INLINE unsigned char _interlockedbittestandset64(volatile long long * a, long long b)
758 {
759 	unsigned char retval;
760 	__asm__("lock; btsq %[b], %[a]; setc %b[retval]" : [retval] "=r" (retval), [a] "+m" (*a) : [b] "Ir" (b) : "memory");
761 	return retval;
762 }
763 #endif
764 
765 /*** String operations ***/
766 
767 #if !HAS_BUILTIN(__stosb)
768 /* NOTE: we don't set a memory clobber in the __stosX functions because Visual C++ doesn't */
769 __INTRIN_INLINE void __stosb(unsigned char * Dest, unsigned char Data, size_t Count)
770 {
771 	__asm__ __volatile__
772 	(
773 		"rep; stosb" :
774 		[Dest] "=D" (Dest), [Count] "=c" (Count) :
775 		"[Dest]" (Dest), "a" (Data), "[Count]" (Count)
776 	);
777 }
778 #endif
779 
780 __INTRIN_INLINE void __stosw(unsigned short * Dest, unsigned short Data, size_t Count)
781 {
782 	__asm__ __volatile__
783 	(
784 		"rep; stosw" :
785 		[Dest] "=D" (Dest), [Count] "=c" (Count) :
786 		"[Dest]" (Dest), "a" (Data), "[Count]" (Count)
787 	);
788 }
789 
790 __INTRIN_INLINE void __stosd(unsigned long * Dest, unsigned long Data, size_t Count)
791 {
792 	__asm__ __volatile__
793 	(
794 		"rep; stosl" :
795 		[Dest] "=D" (Dest), [Count] "=c" (Count) :
796 		"[Dest]" (Dest), "a" (Data), "[Count]" (Count)
797 	);
798 }
799 
800 #ifdef __x86_64__
801 __INTRIN_INLINE void __stosq(unsigned long long * Dest, unsigned long long Data, size_t Count)
802 {
803 	__asm__ __volatile__
804 	(
805 		"rep; stosq" :
806 		[Dest] "=D" (Dest), [Count] "=c" (Count) :
807 		"[Dest]" (Dest), "a" (Data), "[Count]" (Count)
808 	);
809 }
810 #endif
811 
812 __INTRIN_INLINE void __movsb(unsigned char * Destination, const unsigned char * Source, size_t Count)
813 {
814 	__asm__ __volatile__
815 	(
816 		"rep; movsb" :
817 		[Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
818 		"[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
819 	);
820 }
821 
822 __INTRIN_INLINE void __movsw(unsigned short * Destination, const unsigned short * Source, size_t Count)
823 {
824 	__asm__ __volatile__
825 	(
826 		"rep; movsw" :
827 		[Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
828 		"[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
829 	);
830 }
831 
832 __INTRIN_INLINE void __movsd(unsigned long * Destination, const unsigned long * Source, size_t Count)
833 {
834 	__asm__ __volatile__
835 	(
836 		"rep; movsl" :
837 		[Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
838 		"[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
839 	);
840 }
841 
842 #ifdef __x86_64__
843 __INTRIN_INLINE void __movsq(unsigned long long * Destination, const unsigned long long * Source, size_t Count)
844 {
845 	__asm__ __volatile__
846 	(
847 		"rep; movsq" :
848 		[Destination] "=D" (Destination), [Source] "=S" (Source), [Count] "=c" (Count) :
849 		"[Destination]" (Destination), "[Source]" (Source), "[Count]" (Count)
850 	);
851 }
852 #endif
853 
854 #if defined(__x86_64__)
855 
856 /*** GS segment addressing ***/
857 
858 __INTRIN_INLINE void __writegsbyte(unsigned long Offset, unsigned char Data)
859 {
860 	__asm__ __volatile__("movb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
861 }
862 
863 __INTRIN_INLINE void __writegsword(unsigned long Offset, unsigned short Data)
864 {
865 	__asm__ __volatile__("movw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
866 }
867 
868 __INTRIN_INLINE void __writegsdword(unsigned long Offset, unsigned long Data)
869 {
870 	__asm__ __volatile__("movl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
871 }
872 
873 __INTRIN_INLINE void __writegsqword(unsigned long Offset, unsigned long long Data)
874 {
875 	__asm__ __volatile__("movq %q[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
876 }
877 
878 #if !HAS_BUILTIN(__readgsbyte)
879 __INTRIN_INLINE unsigned char __readgsbyte(unsigned long Offset)
880 {
881 	unsigned char value;
882 	__asm__ __volatile__("movb %%gs:%a[Offset], %b[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
883 	return value;
884 }
885 #endif
886 
887 #if !HAS_BUILTIN(__readgsword)
888 __INTRIN_INLINE unsigned short __readgsword(unsigned long Offset)
889 {
890 	unsigned short value;
891 	__asm__ __volatile__("movw %%gs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
892 	return value;
893 }
894 #endif
895 
896 #if !HAS_BUILTIN(__readgsdword)
897 __INTRIN_INLINE unsigned long __readgsdword(unsigned long Offset)
898 {
899 	unsigned long value;
900 	__asm__ __volatile__("movl %%gs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
901 	return value;
902 }
903 #endif
904 
905 #if !HAS_BUILTIN(__readgsqword)
906 __INTRIN_INLINE unsigned long long __readgsqword(unsigned long Offset)
907 {
908 	unsigned long long value;
909 	__asm__ __volatile__("movq %%gs:%a[Offset], %q[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
910 	return value;
911 }
912 #endif
913 
914 __INTRIN_INLINE void __incgsbyte(unsigned long Offset)
915 {
916 	__asm__ __volatile__("incb %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
917 }
918 
919 __INTRIN_INLINE void __incgsword(unsigned long Offset)
920 {
921 	__asm__ __volatile__("incw %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
922 }
923 
924 __INTRIN_INLINE void __incgsdword(unsigned long Offset)
925 {
926 	__asm__ __volatile__("incl %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
927 }
928 
929 __INTRIN_INLINE void __incgsqword(unsigned long Offset)
930 {
931 	__asm__ __volatile__("incq %%gs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
932 }
933 
934 __INTRIN_INLINE void __addgsbyte(unsigned long Offset, unsigned char Data)
935 {
936 	__asm__ __volatile__("addb %b[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
937 }
938 
939 __INTRIN_INLINE void __addgsword(unsigned long Offset, unsigned short Data)
940 {
941 	__asm__ __volatile__("addw %w[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
942 }
943 
944 __INTRIN_INLINE void __addgsdword(unsigned long Offset, unsigned long Data)
945 {
946 	__asm__ __volatile__("addl %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
947 }
948 
949 __INTRIN_INLINE void __addgsqword(unsigned long Offset, unsigned long long Data)
950 {
951 	__asm__ __volatile__("addq %k[Data], %%gs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
952 }
953 
954 #else /* defined(__x86_64__) */
955 
956 /*** FS segment addressing ***/
957 
958 __INTRIN_INLINE void __writefsbyte(unsigned long Offset, unsigned char Data)
959 {
960 	__asm__ __volatile__("movb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
961 }
962 
963 __INTRIN_INLINE void __writefsword(unsigned long Offset, unsigned short Data)
964 {
965 	__asm__ __volatile__("movw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
966 }
967 
968 __INTRIN_INLINE void __writefsdword(unsigned long Offset, unsigned long Data)
969 {
970 	__asm__ __volatile__("movl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "ir" (Data) : "memory");
971 }
972 
973 #if !HAS_BUILTIN(__readfsbyte)
974 __INTRIN_INLINE unsigned char __readfsbyte(unsigned long Offset)
975 {
976 	unsigned char value;
977 	__asm__ __volatile__("movb %%fs:%a[Offset], %b[value]" : [value] "=q" (value) : [Offset] "ir" (Offset));
978 	return value;
979 }
980 #endif
981 
982 #if !HAS_BUILTIN(__readfsword)
983 __INTRIN_INLINE unsigned short __readfsword(unsigned long Offset)
984 {
985 	unsigned short value;
986 	__asm__ __volatile__("movw %%fs:%a[Offset], %w[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
987 	return value;
988 }
989 #endif
990 
991 #if !HAS_BUILTIN(__readfsdword)
992 __INTRIN_INLINE unsigned long __readfsdword(unsigned long Offset)
993 {
994 	unsigned long value;
995 	__asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
996 	return value;
997 }
998 #endif
999 
1000 __INTRIN_INLINE void __incfsbyte(unsigned long Offset)
1001 {
1002 	__asm__ __volatile__("incb %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
1003 }
1004 
1005 __INTRIN_INLINE void __incfsword(unsigned long Offset)
1006 {
1007 	__asm__ __volatile__("incw %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
1008 }
1009 
1010 __INTRIN_INLINE void __incfsdword(unsigned long Offset)
1011 {
1012 	__asm__ __volatile__("incl %%fs:%a[Offset]" : : [Offset] "ir" (Offset) : "memory");
1013 }
1014 
1015 /* NOTE: the bizarre implementation of __addfsxxx mimics the broken Visual C++ behavior */
1016 __INTRIN_INLINE void __addfsbyte(unsigned long Offset, unsigned char Data)
1017 {
1018 	if(!__builtin_constant_p(Offset))
1019 		__asm__ __volatile__("addb %b[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1020 	else
1021 		__asm__ __volatile__("addb %b[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1022 }
1023 
1024 __INTRIN_INLINE void __addfsword(unsigned long Offset, unsigned short Data)
1025 {
1026 	if(!__builtin_constant_p(Offset))
1027 		__asm__ __volatile__("addw %w[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1028 	else
1029 		__asm__ __volatile__("addw %w[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1030 }
1031 
1032 __INTRIN_INLINE void __addfsdword(unsigned long Offset, unsigned long Data)
1033 {
1034 	if(!__builtin_constant_p(Offset))
1035 		__asm__ __volatile__("addl %k[Offset], %%fs:%a[Offset]" : : [Offset] "r" (Offset) : "memory");
1036 	else
1037 		__asm__ __volatile__("addl %k[Data], %%fs:%a[Offset]" : : [Offset] "ir" (Offset), [Data] "iq" (Data) : "memory");
1038 }
1039 
1040 #endif /* defined(__x86_64__) */
1041 
1042 
1043 /*** Bit manipulation ***/
1044 
1045 #if !HAS_BUILTIN(_BitScanForward)
1046 __INTRIN_INLINE unsigned char _BitScanForward(unsigned long * Index, unsigned long Mask)
1047 {
1048 	__asm__("bsfl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1049 	return Mask ? 1 : 0;
1050 }
1051 #endif
1052 
1053 #if !HAS_BUILTIN(_BitScanReverse)
1054 __INTRIN_INLINE unsigned char _BitScanReverse(unsigned long * Index, unsigned long Mask)
1055 {
1056 	__asm__("bsrl %[Mask], %[Index]" : [Index] "=r" (*Index) : [Mask] "mr" (Mask));
1057 	return Mask ? 1 : 0;
1058 }
1059 #endif
1060 
1061 #if !HAS_BUILTIN(_bittest)
1062 /* NOTE: again, the bizarre implementation follows Visual C++ */
1063 __INTRIN_INLINE unsigned char _bittest(const long * a, long b)
1064 {
1065 	unsigned char retval;
1066 
1067 	if(__builtin_constant_p(b))
1068 		__asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 32))), [b] "Ir" (b % 32));
1069 	else
1070 		__asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1071 
1072 	return retval;
1073 }
1074 #endif
1075 
1076 #ifdef __x86_64__
1077 #if !HAS_BUILTIN(_BitScanForward64)
1078 __INTRIN_INLINE unsigned char _BitScanForward64(unsigned long * Index, unsigned long long Mask)
1079 {
1080 	unsigned long long Index64;
1081 	__asm__("bsfq %[Mask], %[Index]" : [Index] "=r" (Index64) : [Mask] "mr" (Mask));
1082 	*Index = Index64;
1083 	return Mask ? 1 : 0;
1084 }
1085 #endif
1086 
1087 #if !HAS_BUILTIN(_BitScanReverse64)
1088 __INTRIN_INLINE unsigned char _BitScanReverse64(unsigned long * Index, unsigned long long Mask)
1089 {
1090 	unsigned long long Index64;
1091 	__asm__("bsrq %[Mask], %[Index]" : [Index] "=r" (Index64) : [Mask] "mr" (Mask));
1092 	*Index = Index64;
1093 	return Mask ? 1 : 0;
1094 }
1095 #endif
1096 
1097 #if !HAS_BUILTIN(_bittest64)
1098 __INTRIN_INLINE unsigned char _bittest64(const long long * a, long long b)
1099 {
1100 	unsigned char retval;
1101 
1102 	if(__builtin_constant_p(b))
1103 		__asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "mr" (*(a + (b / 64))), [b] "Ir" (b % 64));
1104 	else
1105 		__asm__("bt %[b], %[a]; setb %b[retval]" : [retval] "=q" (retval) : [a] "m" (*a), [b] "r" (b));
1106 
1107 	return retval;
1108 }
1109 #endif
1110 #endif
1111 
1112 #if !HAS_BUILTIN(_bittestandcomplement)
1113 __INTRIN_INLINE unsigned char _bittestandcomplement(long * a, long b)
1114 {
1115 	unsigned char retval;
1116 
1117 	if(__builtin_constant_p(b))
1118 		__asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1119 	else
1120 		__asm__("btc %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1121 
1122 	return retval;
1123 }
1124 #endif
1125 
1126 #if !HAS_BUILTIN(_bittestandreset)
1127 __INTRIN_INLINE unsigned char _bittestandreset(long * a, long b)
1128 {
1129 	unsigned char retval;
1130 
1131 	if(__builtin_constant_p(b))
1132 		__asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1133 	else
1134 		__asm__("btr %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1135 
1136 	return retval;
1137 }
1138 #endif
1139 
1140 #if !HAS_BUILTIN(_bittestandset)
1141 __INTRIN_INLINE unsigned char _bittestandset(long * a, long b)
1142 {
1143 	unsigned char retval;
1144 
1145 	if(__builtin_constant_p(b))
1146 		__asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 32))), [retval] "=q" (retval) : [b] "Ir" (b % 32));
1147 	else
1148 		__asm__("bts %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1149 
1150 	return retval;
1151 }
1152 #endif
1153 
1154 #ifdef __x86_64__
1155 #if !HAS_BUILTIN(_bittestandset64)
1156 __INTRIN_INLINE unsigned char _bittestandset64(long long * a, long long b)
1157 {
1158 	unsigned char retval;
1159 
1160 	if(__builtin_constant_p(b))
1161 		__asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1162 	else
1163 		__asm__("btsq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1164 
1165 	return retval;
1166 }
1167 #endif
1168 
1169 #if !HAS_BUILTIN(_bittestandreset64)
1170 __INTRIN_INLINE unsigned char _bittestandreset64(long long * a, long long b)
1171 {
1172 	unsigned char retval;
1173 
1174 	if(__builtin_constant_p(b))
1175 		__asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1176 	else
1177 		__asm__("btrq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1178 
1179 	return retval;
1180 }
1181 #endif
1182 
1183 #if !HAS_BUILTIN(_bittestandcomplement64)
1184 __INTRIN_INLINE unsigned char _bittestandcomplement64(long long * a, long long b)
1185 {
1186 	unsigned char retval;
1187 
1188 	if(__builtin_constant_p(b))
1189 		__asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+mr" (*(a + (b / 64))), [retval] "=q" (retval) : [b] "Ir" (b % 64));
1190 	else
1191 		__asm__("btcq %[b], %[a]; setb %b[retval]" : [a] "+m" (*a), [retval] "=q" (retval) : [b] "r" (b));
1192 
1193 	return retval;
1194 }
1195 #endif
1196 #endif /* __x86_64__ */
1197 
1198 #if !HAS_BUILTIN(_rotl8)
1199 __INTRIN_INLINE unsigned char __cdecl _rotl8(unsigned char value, unsigned char shift)
1200 {
1201 	unsigned char retval;
1202 	__asm__("rolb %b[shift], %b[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1203 	return retval;
1204 }
1205 #endif
1206 
1207 #if !HAS_BUILTIN(_rotl16)
1208 __INTRIN_INLINE unsigned short __cdecl _rotl16(unsigned short value, unsigned char shift)
1209 {
1210 	unsigned short retval;
1211 	__asm__("rolw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1212 	return retval;
1213 }
1214 #endif
1215 
1216 #if !HAS_BUILTIN(_rotl)
1217 __INTRIN_INLINE unsigned int __cdecl _rotl(unsigned int value, int shift)
1218 {
1219 	unsigned int retval;
1220 	__asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1221 	return retval;
1222 }
1223 #endif
1224 
1225 #if !HAS_BUILTIN(_rotl64)
1226 #ifdef __x86_64__
1227 __INTRIN_INLINE unsigned long long _rotl64(unsigned long long value, int shift)
1228 {
1229 	unsigned long long retval;
1230 	__asm__("rolq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1231 	return retval;
1232 }
1233 #else /* __x86_64__ */
1234 __INTRIN_INLINE unsigned long long __cdecl _rotl64(unsigned long long value, int shift)
1235 {
1236     /* FIXME: this is probably not optimal */
1237     return (value << shift) | (value >> (64 - shift));
1238 }
1239 #endif /* __x86_64__ */
1240 #endif /* !HAS_BUILTIN(_rotl64) */
1241 
1242 #if !HAS_BUILTIN(_rotr)
1243 __INTRIN_INLINE unsigned int __cdecl _rotr(unsigned int value, int shift)
1244 {
1245 	unsigned int retval;
1246 	__asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1247 	return retval;
1248 }
1249 #endif
1250 
1251 #if !HAS_BUILTIN(_rotr8)
1252 __INTRIN_INLINE unsigned char __cdecl _rotr8(unsigned char value, unsigned char shift)
1253 {
1254 	unsigned char retval;
1255 	__asm__("rorb %b[shift], %b[retval]" : [retval] "=qm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1256 	return retval;
1257 }
1258 #endif
1259 
1260 #if !HAS_BUILTIN(_rotr16)
1261 __INTRIN_INLINE unsigned short __cdecl _rotr16(unsigned short value, unsigned char shift)
1262 {
1263 	unsigned short retval;
1264 	__asm__("rorw %b[shift], %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1265 	return retval;
1266 }
1267 #endif
1268 
1269 #if !HAS_BUILTIN(_rotr64)
1270 #ifdef __x86_64__
1271 __INTRIN_INLINE unsigned long long _rotr64(unsigned long long value, int shift)
1272 {
1273 	unsigned long long retval;
1274 	__asm__("rorq %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1275 	return retval;
1276 }
1277 #else /* __x86_64__ */
1278 __INTRIN_INLINE unsigned long long __cdecl _rotr64(unsigned long long value, int shift)
1279 {
1280     /* FIXME: this is probably not optimal */
1281     return (value >> shift) | (value << (64 - shift));
1282 }
1283 #endif /* __x86_64__ */
1284 #endif /* !HAS_BUILTIN(_rotr64) */
1285 
1286 #if !HAS_BUILTIN(_lrotl)
1287 __INTRIN_INLINE unsigned long __cdecl _lrotl(unsigned long value, int shift)
1288 {
1289 	unsigned long retval;
1290 	__asm__("roll %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1291 	return retval;
1292 }
1293 #endif
1294 
1295 #if !HAS_BUILTIN(_lrotr)
1296 __INTRIN_INLINE unsigned long __cdecl _lrotr(unsigned long value, int shift)
1297 {
1298 	unsigned long retval;
1299 	__asm__("rorl %b[shift], %k[retval]" : [retval] "=rm" (retval) : "[retval]" (value), [shift] "Nc" (shift));
1300 	return retval;
1301 }
1302 #endif
1303 
1304 #ifdef __x86_64__
1305 __INTRIN_INLINE unsigned long long __ll_lshift(unsigned long long Mask, int Bit)
1306 {
1307     unsigned long long retval;
1308     unsigned char shift = Bit & 0x3F;
1309 
1310     __asm__
1311     (
1312         "shlq %[shift], %[Mask]" : "=r"(retval) : [Mask] "0"(Mask), [shift] "c"(shift)
1313     );
1314 
1315     return retval;
1316 }
1317 
1318 __INTRIN_INLINE long long __ll_rshift(long long Mask, int Bit)
1319 {
1320     long long retval;
1321     unsigned char shift = Bit & 0x3F;
1322 
1323     __asm__
1324     (
1325         "sarq %[shift], %[Mask]" : "=r"(retval) : [Mask] "0"(Mask), [shift] "c"(shift)
1326     );
1327 
1328     return retval;
1329 }
1330 
1331 __INTRIN_INLINE unsigned long long __ull_rshift(unsigned long long Mask, int Bit)
1332 {
1333     long long retval;
1334     unsigned char shift = Bit & 0x3F;
1335 
1336     __asm__
1337     (
1338         "shrq %[shift], %[Mask]" : "=r"(retval) : [Mask] "0"(Mask), [shift] "c"(shift)
1339     );
1340 
1341     return retval;
1342 }
1343 #else
1344 /*
1345 	NOTE: in __ll_lshift, __ll_rshift and __ull_rshift we use the "A"
1346 	constraint (edx:eax) for the Mask argument, because it's the only way GCC
1347 	can pass 64-bit operands around - passing the two 32 bit parts separately
1348 	just confuses it. Also we declare Bit as an int and then truncate it to
1349 	match Visual C++ behavior
1350 */
1351 __INTRIN_INLINE unsigned long long __ll_lshift(unsigned long long Mask, int Bit)
1352 {
1353 	unsigned long long retval = Mask;
1354 
1355 	__asm__
1356 	(
1357 		"shldl %b[Bit], %%eax, %%edx; sall %b[Bit], %%eax" :
1358 		"+A" (retval) :
1359 		[Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1360 	);
1361 
1362 	return retval;
1363 }
1364 
1365 __INTRIN_INLINE long long __ll_rshift(long long Mask, int Bit)
1366 {
1367 	long long retval = Mask;
1368 
1369 	__asm__
1370 	(
1371 		"shrdl %b[Bit], %%edx, %%eax; sarl %b[Bit], %%edx" :
1372 		"+A" (retval) :
1373 		[Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1374 	);
1375 
1376 	return retval;
1377 }
1378 
1379 __INTRIN_INLINE unsigned long long __ull_rshift(unsigned long long Mask, int Bit)
1380 {
1381 	unsigned long long retval = Mask;
1382 
1383 	__asm__
1384 	(
1385 		"shrdl %b[Bit], %%edx, %%eax; shrl %b[Bit], %%edx" :
1386 		"+A" (retval) :
1387 		[Bit] "Nc" ((unsigned char)((unsigned long)Bit) & 0xFF)
1388 	);
1389 
1390 	return retval;
1391 }
1392 #endif
1393 
1394 __INTRIN_INLINE unsigned short __cdecl _byteswap_ushort(unsigned short value)
1395 {
1396 	unsigned short retval;
1397 	__asm__("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (value));
1398 	return retval;
1399 }
1400 
1401 __INTRIN_INLINE unsigned long __cdecl _byteswap_ulong(unsigned long value)
1402 {
1403 	unsigned long retval;
1404 	__asm__("bswapl %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1405 	return retval;
1406 }
1407 
1408 #ifdef __x86_64__
1409 __INTRIN_INLINE unsigned long long _byteswap_uint64(unsigned long long value)
1410 {
1411 	unsigned long long retval;
1412 	__asm__("bswapq %[retval]" : [retval] "=r" (retval) : "[retval]" (value));
1413 	return retval;
1414 }
1415 #else
1416 __INTRIN_INLINE unsigned long long __cdecl _byteswap_uint64(unsigned long long value)
1417 {
1418 	union {
1419 		unsigned long long int64part;
1420 		struct {
1421 			unsigned long lowpart;
1422 			unsigned long hipart;
1423 		};
1424 	} retval;
1425 	retval.int64part = value;
1426 	__asm__("bswapl %[lowpart]\n"
1427 	        "bswapl %[hipart]\n"
1428 	        : [lowpart] "=r" (retval.hipart), [hipart] "=r" (retval.lowpart)  : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart) );
1429 	return retval.int64part;
1430 }
1431 #endif
1432 
1433 #if !HAS_BUILTIN(__lzcnt)
1434 __INTRIN_INLINE unsigned int __lzcnt(unsigned int value)
1435 {
1436 	return __builtin_clz(value);
1437 }
1438 #endif
1439 
1440 #if !HAS_BUILTIN(__lzcnt16)
1441 __INTRIN_INLINE unsigned short __lzcnt16(unsigned short value)
1442 {
1443 	return __builtin_clz(value);
1444 }
1445 #endif
1446 
1447 #if !HAS_BUILTIN(__popcnt)
1448 __INTRIN_INLINE unsigned int __popcnt(unsigned int value)
1449 {
1450 	return __builtin_popcount(value);
1451 }
1452 #endif
1453 
1454 #if !HAS_BUILTIN(__popcnt16)
1455 __INTRIN_INLINE unsigned short __popcnt16(unsigned short value)
1456 {
1457 	return __builtin_popcount(value);
1458 }
1459 #endif
1460 
1461 #ifdef __x86_64__
1462 #if !HAS_BUILTIN(__lzcnt64)
1463 __INTRIN_INLINE unsigned long long __lzcnt64(unsigned long long value)
1464 {
1465 	return __builtin_clzll(value);
1466 }
1467 #endif
1468 
1469 #if !HAS_BUILTIN(__popcnt64)
1470 __INTRIN_INLINE unsigned long long __popcnt64(unsigned long long value)
1471 {
1472 	return __builtin_popcountll(value);
1473 }
1474 #endif
1475 #endif
1476 
1477 /*** 64-bit math ***/
1478 
1479 #if !HAS_BUILTIN(__emul)
1480 __INTRIN_INLINE long long __emul(int a, int b)
1481 {
1482 	long long retval;
1483 	__asm__("imull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1484 	return retval;
1485 }
1486 #endif
1487 
1488 #if !HAS_BUILTIN(__emulu)
1489 __INTRIN_INLINE unsigned long long __emulu(unsigned int a, unsigned int b)
1490 {
1491 	unsigned long long retval;
1492 	__asm__("mull %[b]" : "=A" (retval) : [a] "a" (a), [b] "rm" (b));
1493 	return retval;
1494 }
1495 #endif
1496 
1497 __INTRIN_INLINE long long __cdecl _abs64(long long value)
1498 {
1499     return (value >= 0) ? value : -value;
1500 }
1501 
1502 #ifdef __x86_64__
1503 #if !HAS_BUILTIN(__mulh)
1504 __INTRIN_INLINE long long __mulh(long long a, long long b)
1505 {
1506 	long long retval;
1507 	__asm__("imulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1508 	return retval;
1509 }
1510 #endif
1511 
1512 #if !HAS_BUILTIN(__umulh)
1513 __INTRIN_INLINE unsigned long long __umulh(unsigned long long a, unsigned long long b)
1514 {
1515 	unsigned long long retval;
1516 	__asm__("mulq %[b]" : "=d" (retval) : [a] "a" (a), [b] "rm" (b));
1517 	return retval;
1518 }
1519 #endif
1520 #endif
1521 
1522 /*** Port I/O ***/
1523 
1524 __INTRIN_INLINE unsigned char __inbyte(unsigned short Port)
1525 {
1526 	unsigned char byte;
1527 	__asm__ __volatile__("inb %w[Port], %b[byte]" : [byte] "=a" (byte) : [Port] "Nd" (Port));
1528 	return byte;
1529 }
1530 
1531 __INTRIN_INLINE unsigned short __inword(unsigned short Port)
1532 {
1533 	unsigned short word;
1534 	__asm__ __volatile__("inw %w[Port], %w[word]" : [word] "=a" (word) : [Port] "Nd" (Port));
1535 	return word;
1536 }
1537 
1538 __INTRIN_INLINE unsigned long __indword(unsigned short Port)
1539 {
1540 	unsigned long dword;
1541 	__asm__ __volatile__("inl %w[Port], %k[dword]" : [dword] "=a" (dword) : [Port] "Nd" (Port));
1542 	return dword;
1543 }
1544 
1545 __INTRIN_INLINE void __inbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1546 {
1547 	__asm__ __volatile__
1548 	(
1549 		"rep; insb" :
1550 		[Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1551 		"d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1552 		"memory"
1553 	);
1554 }
1555 
1556 __INTRIN_INLINE void __inwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1557 {
1558 	__asm__ __volatile__
1559 	(
1560 		"rep; insw" :
1561 		[Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1562 		"d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1563 		"memory"
1564 	);
1565 }
1566 
1567 __INTRIN_INLINE void __indwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1568 {
1569 	__asm__ __volatile__
1570 	(
1571 		"rep; insl" :
1572 		[Buffer] "=D" (Buffer), [Count] "=c" (Count) :
1573 		"d" (Port), "[Buffer]" (Buffer), "[Count]" (Count) :
1574 		"memory"
1575 	);
1576 }
1577 
1578 __INTRIN_INLINE void __outbyte(unsigned short Port, unsigned char Data)
1579 {
1580 	__asm__ __volatile__("outb %b[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1581 }
1582 
1583 __INTRIN_INLINE void __outword(unsigned short Port, unsigned short Data)
1584 {
1585 	__asm__ __volatile__("outw %w[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1586 }
1587 
1588 __INTRIN_INLINE void __outdword(unsigned short Port, unsigned long Data)
1589 {
1590 	__asm__ __volatile__("outl %k[Data], %w[Port]" : : [Port] "Nd" (Port), [Data] "a" (Data));
1591 }
1592 
1593 __INTRIN_INLINE void __outbytestring(unsigned short Port, unsigned char * Buffer, unsigned long Count)
1594 {
1595 	__asm__ __volatile__("rep; outsb" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1596 }
1597 
1598 __INTRIN_INLINE void __outwordstring(unsigned short Port, unsigned short * Buffer, unsigned long Count)
1599 {
1600 	__asm__ __volatile__("rep; outsw" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1601 }
1602 
1603 __INTRIN_INLINE void __outdwordstring(unsigned short Port, unsigned long * Buffer, unsigned long Count)
1604 {
1605 	__asm__ __volatile__("rep; outsl" : : [Port] "d" (Port), [Buffer] "S" (Buffer), "c" (Count));
1606 }
1607 
1608 __INTRIN_INLINE int __cdecl _inp(unsigned short Port)
1609 {
1610 	return __inbyte(Port);
1611 }
1612 
1613 __INTRIN_INLINE unsigned short __cdecl _inpw(unsigned short Port)
1614 {
1615 	return __inword(Port);
1616 }
1617 
1618 __INTRIN_INLINE unsigned long __cdecl _inpd(unsigned short Port)
1619 {
1620 	return __indword(Port);
1621 }
1622 
1623 __INTRIN_INLINE int __cdecl _outp(unsigned short Port, int databyte)
1624 {
1625 	__outbyte(Port, (unsigned char)databyte);
1626 	return databyte;
1627 }
1628 
1629 __INTRIN_INLINE unsigned short __cdecl _outpw(unsigned short Port, unsigned short dataword)
1630 {
1631 	__outword(Port, dataword);
1632 	return dataword;
1633 }
1634 
1635 __INTRIN_INLINE unsigned long __cdecl _outpd(unsigned short Port, unsigned long dataword)
1636 {
1637 	__outdword(Port, dataword);
1638 	return dataword;
1639 }
1640 
1641 
1642 /*** System information ***/
1643 
1644 __INTRIN_INLINE void __cpuid(int CPUInfo[4], int InfoType)
1645 {
1646 	__asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType));
1647 }
1648 
1649 __INTRIN_INLINE void __cpuidex(int CPUInfo[4], int InfoType, int ECXValue)
1650 {
1651 	__asm__ __volatile__("cpuid" : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) : "a" (InfoType), "c" (ECXValue));
1652 }
1653 
1654 #if !HAS_BUILTIN(__rdtsc)
1655 __INTRIN_INLINE unsigned long long __rdtsc(void)
1656 {
1657 #ifdef __x86_64__
1658 	unsigned long long low, high;
1659 	__asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
1660 	return low | (high << 32);
1661 #else
1662 	unsigned long long retval;
1663 	__asm__ __volatile__("rdtsc" : "=A"(retval));
1664 	return retval;
1665 #endif
1666 }
1667 #endif /* !HAS_BUILTIN(__rdtsc) */
1668 
1669 __INTRIN_INLINE void __writeeflags(uintptr_t Value)
1670 {
1671 	__asm__ __volatile__("push %0\n popf" : : "rim"(Value));
1672 }
1673 
1674 __INTRIN_INLINE uintptr_t __readeflags(void)
1675 {
1676 	uintptr_t retval;
1677 	__asm__ __volatile__("pushf\n pop %0" : "=rm"(retval));
1678 	return retval;
1679 }
1680 
1681 /*** Interrupts ***/
1682 
1683 #if !HAS_BUILTIN(__debugbreak)
1684 __INTRIN_INLINE void __cdecl __debugbreak(void)
1685 {
1686 	__asm__("int $3");
1687 }
1688 #endif
1689 
1690 #if !HAS_BUILTIN(__ud2)
1691 __INTRIN_INLINE void __ud2(void)
1692 {
1693 	__asm__("ud2");
1694 }
1695 #endif
1696 
1697 #if !HAS_BUILTIN(__int2c)
1698 __INTRIN_INLINE void __int2c(void)
1699 {
1700 	__asm__("int $0x2c");
1701 }
1702 #endif
1703 
1704 __INTRIN_INLINE void __cdecl _disable(void)
1705 {
1706 	__asm__("cli" : : : "memory");
1707 }
1708 
1709 __INTRIN_INLINE void __cdecl _enable(void)
1710 {
1711 	__asm__("sti" : : : "memory");
1712 }
1713 
1714 __INTRIN_INLINE void __halt(void)
1715 {
1716 	__asm__("hlt" : : : "memory");
1717 }
1718 
1719 #if !HAS_BUILTIN(__fastfail)
1720 __declspec(noreturn)
1721 __INTRIN_INLINE void __fastfail(unsigned int Code)
1722 {
1723 	__asm__("int $0x29" : : "c"(Code) : "memory");
1724 	__builtin_unreachable();
1725 }
1726 #endif
1727 
1728 /*** Protected memory management ***/
1729 
1730 #ifdef __x86_64__
1731 
1732 __INTRIN_INLINE void __writecr0(unsigned long long Data)
1733 {
1734 	__asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1735 }
1736 
1737 __INTRIN_INLINE void __writecr3(unsigned long long Data)
1738 {
1739 	__asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1740 }
1741 
1742 __INTRIN_INLINE void __writecr4(unsigned long long Data)
1743 {
1744 	__asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1745 }
1746 
1747 __INTRIN_INLINE void __writecr8(unsigned long long Data)
1748 {
1749 	__asm__("mov %[Data], %%cr8" : : [Data] "r" (Data) : "memory");
1750 }
1751 
1752 __INTRIN_INLINE unsigned long long __readcr0(void)
1753 {
1754 	unsigned long long value;
1755 	__asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1756 	return value;
1757 }
1758 
1759 __INTRIN_INLINE unsigned long long __readcr2(void)
1760 {
1761 	unsigned long long value;
1762 	__asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1763 	return value;
1764 }
1765 
1766 __INTRIN_INLINE unsigned long long __readcr3(void)
1767 {
1768 	unsigned long long value;
1769 	__asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1770 	return value;
1771 }
1772 
1773 __INTRIN_INLINE unsigned long long __readcr4(void)
1774 {
1775 	unsigned long long value;
1776 	__asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1777 	return value;
1778 }
1779 
1780 __INTRIN_INLINE unsigned long long __readcr8(void)
1781 {
1782 	unsigned long long value;
1783 	__asm__ __volatile__("movq %%cr8, %q[value]" : [value] "=r" (value));
1784 	return value;
1785 }
1786 
1787 #else /* __x86_64__ */
1788 
1789 __INTRIN_INLINE void __writecr0(unsigned int Data)
1790 {
1791 	__asm__("mov %[Data], %%cr0" : : [Data] "r" (Data) : "memory");
1792 }
1793 
1794 __INTRIN_INLINE void __writecr3(unsigned int Data)
1795 {
1796 	__asm__("mov %[Data], %%cr3" : : [Data] "r" (Data) : "memory");
1797 }
1798 
1799 __INTRIN_INLINE void __writecr4(unsigned int Data)
1800 {
1801 	__asm__("mov %[Data], %%cr4" : : [Data] "r" (Data) : "memory");
1802 }
1803 
1804 __INTRIN_INLINE unsigned long __readcr0(void)
1805 {
1806 	unsigned long value;
1807 	__asm__ __volatile__("mov %%cr0, %[value]" : [value] "=r" (value));
1808 	return value;
1809 }
1810 
1811 __INTRIN_INLINE unsigned long __readcr2(void)
1812 {
1813 	unsigned long value;
1814 	__asm__ __volatile__("mov %%cr2, %[value]" : [value] "=r" (value));
1815 	return value;
1816 }
1817 
1818 __INTRIN_INLINE unsigned long __readcr3(void)
1819 {
1820 	unsigned long value;
1821 	__asm__ __volatile__("mov %%cr3, %[value]" : [value] "=r" (value));
1822 	return value;
1823 }
1824 
1825 __INTRIN_INLINE unsigned long __readcr4(void)
1826 {
1827 	unsigned long value;
1828 	__asm__ __volatile__("mov %%cr4, %[value]" : [value] "=r" (value));
1829 	return value;
1830 }
1831 
1832 #endif /* __x86_64__ */
1833 
1834 #ifdef __x86_64__
1835 
1836 __INTRIN_INLINE unsigned long long __readdr(unsigned int reg)
1837 {
1838 	unsigned long long value;
1839 	switch (reg)
1840 	{
1841 		case 0:
1842 			__asm__ __volatile__("movq %%dr0, %q[value]" : [value] "=r" (value));
1843 			break;
1844 		case 1:
1845 			__asm__ __volatile__("movq %%dr1, %q[value]" : [value] "=r" (value));
1846 			break;
1847 		case 2:
1848 			__asm__ __volatile__("movq %%dr2, %q[value]" : [value] "=r" (value));
1849 			break;
1850 		case 3:
1851 			__asm__ __volatile__("movq %%dr3, %q[value]" : [value] "=r" (value));
1852 			break;
1853 		case 4:
1854 			__asm__ __volatile__("movq %%dr4, %q[value]" : [value] "=r" (value));
1855 			break;
1856 		case 5:
1857 			__asm__ __volatile__("movq %%dr5, %q[value]" : [value] "=r" (value));
1858 			break;
1859 		case 6:
1860 			__asm__ __volatile__("movq %%dr6, %q[value]" : [value] "=r" (value));
1861 			break;
1862 		case 7:
1863 			__asm__ __volatile__("movq %%dr7, %q[value]" : [value] "=r" (value));
1864 			break;
1865 	}
1866 	return value;
1867 }
1868 
1869 __INTRIN_INLINE void __writedr(unsigned reg, unsigned long long value)
1870 {
1871 	switch (reg)
1872 	{
1873 		case 0:
1874 			__asm__("movq %q[value], %%dr0" : : [value] "r" (value) : "memory");
1875 			break;
1876 		case 1:
1877 			__asm__("movq %q[value], %%dr1" : : [value] "r" (value) : "memory");
1878 			break;
1879 		case 2:
1880 			__asm__("movq %q[value], %%dr2" : : [value] "r" (value) : "memory");
1881 			break;
1882 		case 3:
1883 			__asm__("movq %q[value], %%dr3" : : [value] "r" (value) : "memory");
1884 			break;
1885 		case 4:
1886 			__asm__("movq %q[value], %%dr4" : : [value] "r" (value) : "memory");
1887 			break;
1888 		case 5:
1889 			__asm__("movq %q[value], %%dr5" : : [value] "r" (value) : "memory");
1890 			break;
1891 		case 6:
1892 			__asm__("movq %q[value], %%dr6" : : [value] "r" (value) : "memory");
1893 			break;
1894 		case 7:
1895 			__asm__("movq %q[value], %%dr7" : : [value] "r" (value) : "memory");
1896 			break;
1897 	}
1898 }
1899 
1900 #else /* __x86_64__ */
1901 
1902 __INTRIN_INLINE unsigned int __readdr(unsigned int reg)
1903 {
1904 	unsigned int value;
1905 	switch (reg)
1906 	{
1907 		case 0:
1908 			__asm__ __volatile__("mov %%dr0, %[value]" : [value] "=r" (value));
1909 			break;
1910 		case 1:
1911 			__asm__ __volatile__("mov %%dr1, %[value]" : [value] "=r" (value));
1912 			break;
1913 		case 2:
1914 			__asm__ __volatile__("mov %%dr2, %[value]" : [value] "=r" (value));
1915 			break;
1916 		case 3:
1917 			__asm__ __volatile__("mov %%dr3, %[value]" : [value] "=r" (value));
1918 			break;
1919 		case 4:
1920 			__asm__ __volatile__("mov %%dr4, %[value]" : [value] "=r" (value));
1921 			break;
1922 		case 5:
1923 			__asm__ __volatile__("mov %%dr5, %[value]" : [value] "=r" (value));
1924 			break;
1925 		case 6:
1926 			__asm__ __volatile__("mov %%dr6, %[value]" : [value] "=r" (value));
1927 			break;
1928 		case 7:
1929 			__asm__ __volatile__("mov %%dr7, %[value]" : [value] "=r" (value));
1930 			break;
1931 	}
1932 	return value;
1933 }
1934 
1935 __INTRIN_INLINE void __writedr(unsigned reg, unsigned int value)
1936 {
1937 	switch (reg)
1938 	{
1939 		case 0:
1940 			__asm__("mov %[value], %%dr0" : : [value] "r" (value) : "memory");
1941 			break;
1942 		case 1:
1943 			__asm__("mov %[value], %%dr1" : : [value] "r" (value) : "memory");
1944 			break;
1945 		case 2:
1946 			__asm__("mov %[value], %%dr2" : : [value] "r" (value) : "memory");
1947 			break;
1948 		case 3:
1949 			__asm__("mov %[value], %%dr3" : : [value] "r" (value) : "memory");
1950 			break;
1951 		case 4:
1952 			__asm__("mov %[value], %%dr4" : : [value] "r" (value) : "memory");
1953 			break;
1954 		case 5:
1955 			__asm__("mov %[value], %%dr5" : : [value] "r" (value) : "memory");
1956 			break;
1957 		case 6:
1958 			__asm__("mov %[value], %%dr6" : : [value] "r" (value) : "memory");
1959 			break;
1960 		case 7:
1961 			__asm__("mov %[value], %%dr7" : : [value] "r" (value) : "memory");
1962 			break;
1963 	}
1964 }
1965 
1966 #endif /* __x86_64__ */
1967 
1968 __INTRIN_INLINE void __invlpg(void *Address)
1969 {
1970 	__asm__ __volatile__ ("invlpg (%[Address])" : : [Address] "b" (Address) : "memory");
1971 }
1972 
1973 
1974 /*** System operations ***/
1975 
1976 __INTRIN_INLINE unsigned long long __readmsr(unsigned long reg)
1977 {
1978 #ifdef __x86_64__
1979 	unsigned long low, high;
1980 	__asm__ __volatile__("rdmsr" : "=a" (low), "=d" (high) : "c" (reg));
1981 	return ((unsigned long long)high << 32) | low;
1982 #else
1983 	unsigned long long retval;
1984 	__asm__ __volatile__("rdmsr" : "=A" (retval) : "c" (reg));
1985 	return retval;
1986 #endif
1987 }
1988 
1989 __INTRIN_INLINE void __writemsr(unsigned long Register, unsigned long long Value)
1990 {
1991 #ifdef __x86_64__
1992 	__asm__ __volatile__("wrmsr" : : "a" (Value), "d" (Value >> 32), "c" (Register));
1993 #else
1994 	__asm__ __volatile__("wrmsr" : : "A" (Value), "c" (Register));
1995 #endif
1996 }
1997 
1998 __INTRIN_INLINE unsigned long long __readpmc(unsigned long counter)
1999 {
2000 	unsigned long long retval;
2001 	__asm__ __volatile__("rdpmc" : "=A" (retval) : "c" (counter));
2002 	return retval;
2003 }
2004 
2005 /* NOTE: an immediate value for 'a' will raise an ICE in Visual C++ */
2006 __INTRIN_INLINE unsigned long __segmentlimit(unsigned long a)
2007 {
2008 	unsigned long retval;
2009 	__asm__ __volatile__("lsl %[a], %[retval]" : [retval] "=r" (retval) : [a] "rm" (a));
2010 	return retval;
2011 }
2012 
2013 __INTRIN_INLINE void __wbinvd(void)
2014 {
2015 	__asm__ __volatile__("wbinvd" : : : "memory");
2016 }
2017 
2018 __INTRIN_INLINE void __lidt(void *Source)
2019 {
2020 	__asm__ __volatile__("lidt %0" : : "m"(*(short*)Source));
2021 }
2022 
2023 __INTRIN_INLINE void __sidt(void *Destination)
2024 {
2025 	__asm__ __volatile__("sidt %0" : : "m"(*(short*)Destination) : "memory");
2026 }
2027 
2028 __INTRIN_INLINE void _sgdt(void *Destination)
2029 {
2030 	__asm__ __volatile__("sgdt %0" : : "m"(*(short*)Destination) : "memory");
2031 }
2032 
2033 /*** Misc operations ***/
2034 
2035 #if !HAS_BUILTIN(_mm_pause)
2036 __INTRIN_INLINE void _mm_pause(void)
2037 {
2038 	__asm__ __volatile__("pause" : : : "memory");
2039 }
2040 #endif
2041 
2042 __INTRIN_INLINE void __nop(void)
2043 {
2044 	__asm__ __volatile__("nop");
2045 }
2046 
2047 #ifdef __cplusplus
2048 }
2049 #endif
2050 
2051 #endif /* KJK_INTRIN_X86_H_ */
2052 
2053 /* EOF */
2054