1 /* MACHDEP.H    (c) Copyright Greg Smith, 2001-2010                  */
2 /*              Hercules machine specific code                       */
3 
4 /*-------------------------------------------------------------------*/
5 /*                                                                   */
6 /* This header file contains the following functions, defined as     */
7 /* either normal unoptimzed C code, or else as hand-tuned optimized  */
8 /* assembler-assisted functions for the given machine architecture:  */
9 /*                                                                   */
10 /*                                                                   */
11 /*   Atomic COMPARE-AND-EXCHANGE functions:                          */
12 /*                                                                   */
13 /*       cmpxchg1, cmpxchg4, cmpxchg8, cmpxchg16                     */
14 /*                                                                   */
15 /*                                                                   */
16 /*   Atomic word/double-word FETCH/STORE functions:                  */
17 /*                                                                   */
18 /*       fetch_hw, fetch_hw_noswap, store_hw, store_hw_noswap        */
19 /*       fetch_fw, fetch_fw_noswap, store_fw, store_fw_noswap        */
20 /*       fetch_dw, fetch_dw_noswap, store_dw, store_dw_noswap        */
21 /*                                                                   */
22 /*     64 bit architectures would normally not need to specify       */
23 /*     any of the fetch_ or store_ macros.                           */
24 /*                                                                   */
25 /*     32 bit architectures should specify one of the `fetch_dw'     */
26 /*     and `store_dw' macros.  Little-endian machines should specify */
27 /*     the `noswap' macros.  Big-endian machines can specify either, */
28 /*     both being the same.                                          */
29 /*                                                                   */
30 /*-------------------------------------------------------------------*/
31 
32 #ifndef _HERCULES_MACHDEP_H
33 #define _HERCULES_MACHDEP_H 1
34 
35 #include "opcode.h"         // (need CSWAP32, et.al macros, etc)
36 #include "htypes.h"         // (need Hercules fixed-size data types)
37 
38 /*-------------------------------------------------------------------
39  * Microsoft Visual C/C++...
40  *-------------------------------------------------------------------*/
41 #if defined( _MSVC_ )
42 
43   // PROGRAMMING NOTE: Optimizations normally only apply for release
44   // builds, but we support optionally enabling them for debug too,
45   // as well as purposely DISABLING them for troubleshooting...
46 
47      #define  OPTION_ENABLE_MSVC_OPTIMIZATIONS_FOR_DEBUG_BUILDS_TOO
48   // #define  OPTION_DISABLE_MSVC_OPTIMIZATIONS
49 
50   #undef GEN_MSC_ASSISTS
51 
52   #if defined( DEBUG) || defined( _DEBUG )
53     #if defined(OPTION_ENABLE_MSVC_OPTIMIZATIONS_FOR_DEBUG_BUILDS_TOO) && \
54        !defined(OPTION_DISABLE_MSVC_OPTIMIZATIONS)
55       #define GEN_MSC_ASSISTS
56     #endif
57   #else // (presumed RELEASE build)
58     #if !defined(OPTION_DISABLE_MSVC_OPTIMIZATIONS)
59       #define GEN_MSC_ASSISTS
60     #endif
61   #endif // (debug or release)
62 
63   #undef MSC_X86_32BIT        // any 32-bit X86  (Pentium Pro, Pentium II, Pentium III or better)
64   #undef MSC_X86_64BIT        // any 64-bit X86  (AMD64 or Intel Itanium)
65   #undef MSC_X86_AMD64        // AMD64 only
66   #undef MSC_X86_IA64         // Intel Itanium only
67 
68   #if defined( _M_IX86 ) && ( _M_IX86 >= 600 )
69     #define MSC_X86_32BIT
70   #endif
71   #if defined( _M_AMD64 )
72     #define MSC_X86_AMD64
73     #define MSC_X86_64BIT
74   #endif
75   #if defined( _M_IA64 )
76     #define MSC_X86_IA64
77     #define MSC_X86_64BIT
78   #endif
79 
80   #if defined(GEN_MSC_ASSISTS) && (defined(MSC_X86_32BIT) || defined(MSC_X86_64BIT))
81 
82     // Any X86 at all (both 32/64-bit)
83 
84     #pragma  intrinsic  ( _InterlockedCompareExchange )
85 
86     #define  cmpxchg1(  x, y, z )  cmpxchg1_x86( x, y, z )
87     #define  cmpxchg4(  x, y, z )  cmpxchg4_x86( x, y, z )
88     #define  cmpxchg8(  x, y, z )  cmpxchg8_x86( x, y, z )
89 
90     #if ( _MSC_VER < 1400 )
91 
92       // PROGRAMMING NOTE: compiler versions earlier than VS8 2005
93       // do not have the _InterlockedCompareExchange64 intrinsic so
94       // we use our own hand-coded inline assembler routine instead.
95       // Also note that we can't use __fastcall here since doing so
96       // would interfere with our register usage.
97 
cmpxchg8_x86(U64 * pOldVal,U64 u64NewVal,volatile void * pTarget)98       static __inline BYTE cmpxchg8_x86 ( U64 *pOldVal, U64 u64NewVal, volatile void *pTarget )
99       {
100           // returns 0 == success, 1 otherwise
101           BYTE  rc;
102           U32   u32NewValHigh = u64NewVal >> 32;
103           U32   u32NewValLow  = u64NewVal & 0xffffffff;
104           __asm
105           {
106               mov    esi, [pOldVal]
107               mov    eax, [esi + 0]
108               mov    edx, [esi + 4]
109               mov    ebx, [u32NewValLow]
110               mov    ecx, [u32NewValHigh]
111               mov    esi, [pTarget]
112       #ifdef  OPTION_SMP
113          lock cmpxchg8b  qword ptr [esi]
114       #else
115               cmpxchg8b  qword ptr [esi]
116       #endif
117               setne  rc
118               jz     success
119               mov    esi, [pOldVal]
120               mov    [esi + 0], eax
121               mov    [esi + 4], edx
122           };
123       success:
124           return rc;
125       }
126 
127     #else // ( _MSC_VER >= 1400 )
128 
129       #pragma intrinsic ( _InterlockedCompareExchange64 )
130 
cmpxchg8_x86(U64 * old,U64 new,volatile void * ptr)131       static __inline BYTE __fastcall cmpxchg8_x86 ( U64 *old, U64 new, volatile void *ptr )
132       {
133           // returns 0 == success, 1 otherwise
134           U64 tmp = *old;
135           *old = _InterlockedCompareExchange64( ptr, new, *old );
136           return ((tmp == *old) ? 0 : 1);
137       }
138 
139     #endif // ( _MSC_VER >= 1400 )
140 
cmpxchg4_x86(U32 * old,U32 new,volatile void * ptr)141     static __inline BYTE __fastcall cmpxchg4_x86 ( U32 *old, U32 new, volatile void *ptr )
142     {
143         // returns 0 == success, 1 otherwise
144         U32 tmp = *old;
145         *old = _InterlockedCompareExchange( ptr, new, *old );
146         return ((tmp == *old) ? 0 : 1);
147     }
148 
149     // (must follow cmpxchg4 since it uses it)
cmpxchg1_x86(BYTE * old,BYTE new,volatile void * ptr)150     static __inline BYTE __fastcall cmpxchg1_x86 ( BYTE *old, BYTE new, volatile void *ptr )
151     {
152         // returns 0 == success, 1 otherwise
153 
154         LONG_PTR  off, shift;
155         BYTE  cc;
156         U32  *ptr4, val4, old4, new4;
157 
158         off   = (LONG_PTR)ptr & 3;
159         shift = (3 - off) * 8;
160         ptr4  = (U32*)(((BYTE*)ptr) - off);
161         val4  = CSWAP32(*ptr4);
162 
163         old4  = CSWAP32((val4 & ~(0xff << shift)) | (*old << shift));
164         new4  = CSWAP32((val4 & ~(0xff << shift)) | ( new << shift));
165 
166         cc    = cmpxchg4( &old4, new4, ptr4 );
167 
168         *old  = (CSWAP32(old4) >> shift) & 0xff;
169 
170         return cc;
171     }
172 
173     #if defined(MSC_X86_32BIT)
174 
175       #define fetch_dw_noswap(_p) fetch_dw_x86_noswap((_p))
176       // (must follow cmpxchg8 since it uses it)
fetch_dw_x86_noswap(volatile void * ptr)177       static __inline U64 __fastcall fetch_dw_x86_noswap ( volatile void *ptr )
178       {
179         U64 value = *(U64*)ptr;
180         cmpxchg8( &value, value, (U64*)ptr );
181         return value;
182       }
183 
184       #define store_dw_noswap(_p, _v) store_dw_x86_noswap( (_p), (_v))
185       // (must follow cmpxchg8 since it uses it)
store_dw_x86_noswap(volatile void * ptr,U64 value)186       static __inline void __fastcall store_dw_x86_noswap ( volatile void *ptr, U64 value )
187       {
188         U64 orig = *(U64*)ptr;
189         while ( cmpxchg8( &orig, value, (U64*)ptr ) );
190       }
191     #endif /* defined(MSC_X86_32BIT) */
192 
193   #endif // defined(GEN_MSC_ASSISTS) && (defined(MSC_X86_32BIT) || defined(MSC_X86_64BIT))
194 
195   // ------------------------------------------------------------------
196 
197   #if defined(GEN_MSC_ASSISTS) && defined(MSC_X86_IA64)
198 
199     // (64-bit Itanium assists only)
200 
201     // ZZ FIXME: we should probably use the 'cmpxchg16b' instruction here
202     // instead if the processor supports it (CPUID instruction w/EAX function
203     // code 1 == Feature Information --> ECX bit 13 = CMPXCHG16B available)
204 
205     #pragma  intrinsic  ( _AcquireSpinLock )
206     #pragma  intrinsic  ( _ReleaseSpinLock )
207     #pragma  intrinsic  ( _ReadWriteBarrier )
208 
209     #define  cmpxchg16(     x1, x2, y1, y2, z ) \
210              cmpxchg16_x86( x1, x2, y1, y2, z )
211 
cmpxchg16_x86(U64 * old1,U64 * old2,U64 new1,U64 new2,volatile void * ptr)212     static __inline int __fastcall cmpxchg16_x86 ( U64 *old1, U64 *old2,
213                                                    U64  new1, U64  new2,
214                                                    volatile void  *ptr )
215     {
216         // returns 0 == success, 1 otherwise
217 
218         static unsigned __int64 lock = 0;
219         int code;
220 
221         _AcquireSpinLock( &lock );
222 
223         _ReadWriteBarrier();
224 
225         if (*old1 == *(U64*)ptr && *old2 == *((U64*)ptr + 1))
226         {
227             *(U64*)ptr = new1;
228             *((U64*)ptr + 1) = new2;
229             code = 0;
230         }
231         else
232         {
233             *old1 = *((U64*)ptr);
234             *old2 = *((U64*)ptr + 1);
235             code = 1;
236         }
237 
238         _ReleaseSpinLock( &lock );
239 
240         return code;
241     }
242 
243   #endif // defined(GEN_MSC_ASSISTS) && defined(MSC_X86_IA64)
244 
245 #else // !defined( _MSVC_ )
246 /*-------------------------------------------------------------------
247  * GNU C or other compiler...   (i.e. NON-Microsoft C/C++)
248  *-------------------------------------------------------------------*/
249   #if defined(__i686__) || defined(__pentiumpro__) || \
250       defined(__pentium4__) || defined(__athlon__) || \
251       defined(__athlon)
252     #define _ext_ia32
253   #endif
254 
255   #if defined(__amd64__)
256     #define _ext_amd64
257   #endif
258 
259   #if defined(__powerpc__) || defined(__ppc__) || \
260       defined(__POWERPC__) || defined(__PPC__) || \
261       defined(_POWER)
262     #define _ext_ppc
263   #endif
264 
265 /*-------------------------------------------------------------------
266  * Intel pentiumpro/i686
267  *-------------------------------------------------------------------*/
268 #if defined(_ext_ia32)
269 
270 #undef LOCK_PREFIX
271 #ifdef OPTION_SMP
272 #define LOCK_PREFIX "lock\n\t"
273 #else
274 #define LOCK_PREFIX ""
275 #endif
276 
277     /*
278      * If PIC is defined then ebx is used as the `thunk' reg
279      * However cmpxchg8b requires ebx
280      * In this case we load the value into esi and then
281      * exchange esi and ebx before and after cmpxchg8b
282      */
283 #undef BREG
284 #undef XCHG_BREG
285 #if defined(PIC) && !defined(__CYGWIN__)
286 #define BREG "S"
287 #define XCHG_BREG "xchgl   %%ebx,%%esi\n\t"
288 #else
289 #define BREG "b"
290 #define XCHG_BREG ""
291 #endif
292 
293 #define cmpxchg1(x,y,z) cmpxchg1_i686(x,y,z)
cmpxchg1_i686(BYTE * old,BYTE new,void * ptr)294 static __inline__ BYTE cmpxchg1_i686(BYTE *old, BYTE new, void *ptr) {
295  BYTE code;
296  __asm__ __volatile__ (
297          LOCK_PREFIX
298          "cmpxchgb %b3,%4\n\t"
299          "setnz   %b0"
300          : "=q"(code), "=a"(*old)
301          : "1" (*old),
302            "q" (new),
303            "m" (*(BYTE *)ptr)
304          : "cc" );
305  return code;
306 }
307 
308 #define cmpxchg4(x,y,z) cmpxchg4_i686(x,y,z)
cmpxchg4_i686(U32 * old,U32 new,void * ptr)309 static __inline__ BYTE cmpxchg4_i686(U32 *old, U32 new, void *ptr) {
310  BYTE code;
311  __asm__ __volatile__ (
312          LOCK_PREFIX
313          "cmpxchgl %3,%4\n\t"
314          "setnz   %b0"
315          : "=q"(code), "=a"(*old)
316          : "1" (*old),
317            "q" (new),
318            "m" (*(U32 *)ptr)
319          : "cc" );
320  return code;
321 }
322 
323 #define cmpxchg8(x,y,z) cmpxchg8_i686(x,y,z)
cmpxchg8_i686(U64 * old,U64 new,void * ptr)324 static __inline__ BYTE cmpxchg8_i686(U64 *old, U64 new, void *ptr) {
325  BYTE code;
326 __asm__ __volatile__ (
327          XCHG_BREG
328          LOCK_PREFIX
329          "cmpxchg8b %5\n\t"
330          XCHG_BREG
331          "setnz   %b0"
332          : "=q"(code), "=A"(*old)
333          : "1" (*old),
334            BREG ((unsigned long)new),
335            "c"  ((unsigned long)(new >> 32)),
336            "m" (*(U64 *)ptr)
337          : "cc");
338  return code;
339 }
340 
341 #define fetch_dw_noswap(x) fetch_dw_i686_noswap(x)
fetch_dw_i686_noswap(void * ptr)342 static __inline__ U64 fetch_dw_i686_noswap(void *ptr)
343 {
344  U64 value = *(U64 *)ptr;
345 __asm__ __volatile__ (
346          XCHG_BREG
347          LOCK_PREFIX
348          "cmpxchg8b (%4)\n\t"
349          XCHG_BREG
350          : "=A" (value)
351          : "0" (value),
352            BREG ((unsigned long)value),
353            "c"  ((unsigned long)(value >> 32)),
354            "D" (ptr));
355  return value;
356 }
357 
358 #define store_dw_noswap(x,y) store_dw_i686_noswap(x,y)
store_dw_i686_noswap(void * ptr,U64 value)359 static __inline__ void store_dw_i686_noswap(void *ptr, U64 value) {
360 __asm__ __volatile__ (
361          XCHG_BREG
362          "1:\t"
363          LOCK_PREFIX
364          "cmpxchg8b %3\n\t"
365          "jne     1b\n\t"
366          XCHG_BREG
367          :
368          : "A" (*(U64 *)ptr),
369            BREG ((unsigned long)value),
370            "c"  ((unsigned long)(value >> 32)),
371            "m" (*(U64 *)ptr));
372 }
373 
374 #if defined(OPTION_MULTI_BYTE_ASSIST) && defined(__linux__)
375 #define MULTI_BYTE_ASSIST
376 #define MULTI_BYTE_ASSIST_IA32
377 #endif
378 
379 #endif /* defined(_ext_ia32) */
380 
381 /*-------------------------------------------------------------------
382  * AMD64
383  *-------------------------------------------------------------------*/
384 #if defined(_ext_amd64)
385 
386 #define cmpxchg1(x,y,z) cmpxchg1_amd64(x,y,z)
cmpxchg1_amd64(BYTE * old,BYTE new,void * ptr)387 static __inline__ BYTE cmpxchg1_amd64(BYTE *old, BYTE new, void *ptr) {
388 /* returns zero on success otherwise returns 1 */
389  BYTE code;
390  BYTE *ptr_data=ptr;
391  __asm__ __volatile__ (
392          "lock;   cmpxchgb %b2,%4\n\t"
393          "setnz   %b0\n\t"
394          : "=q"(code), "=a"(*old)
395          : "q"(new),
396            "1"(*old),
397            "m"(*ptr_data)
398          : "cc");
399  return code;
400 }
401 
402 #define cmpxchg4(x,y,z) cmpxchg4_amd64(x,y,z)
cmpxchg4_amd64(U32 * old,U32 new,void * ptr)403 static __inline__ BYTE cmpxchg4_amd64(U32 *old, U32 new, void *ptr) {
404 /* returns zero on success otherwise returns 1 */
405  BYTE code;
406  U32 *ptr_data=ptr;
407  __asm__ __volatile__ (
408          "lock;   cmpxchgl %2,%4\n\t"
409          "setnz   %b0\n\t"
410          : "=q"(code), "=a"(*old)
411          : "q"(new),
412            "1"(*old),
413            "m"(*ptr_data)
414          : "cc");
415  return code;
416 }
417 
418 #define cmpxchg8(x,y,z) cmpxchg8_amd64(x,y,z)
cmpxchg8_amd64(U64 * old,U64 new,void * ptr)419 static __inline__ BYTE cmpxchg8_amd64(U64 *old, U64 new, void *ptr) {
420 /* returns zero on success otherwise returns 1 */
421  BYTE code;
422  U64 *ptr_data=ptr;
423  __asm__ __volatile__ (
424          "lock;   cmpxchgq %2,%4\n\t"
425          "setnz   %b0\n\t"
426          : "=q"(code), "=a"(*old)
427          : "q"(new),
428            "1"(*old),
429            "m"(*ptr_data)
430          : "cc");
431  return code;
432 }
433 
434 #endif /* defined(_ext_amd64) */
435 
436 /*-------------------------------------------------------------------
437  * PowerPC
438  *-------------------------------------------------------------------*/
439 #if defined(_ext_ppc)
440 
441 /* From /usr/src/linux/include/asm-ppc/system.h */
442 
443 /* NOTE: IBM's VisualAge compiler likes 1: style labels
444          but GNU's gcc compiler running on AIX does not. */
445 
446 #if !defined( __GNUC__ )        // (VisualAge presumed)
447   #define LABEL1 "1:\n"
448   #define LABEL2 "2:\n"
449   #define BRNCH2 "2f"
450   #define BRNCH1 "1b"
451 #else                           // (else gcc...)
452   #define LABEL1 "loop%=:\n"
453   #define LABEL2 "exit%=:\n"
454   #define BRNCH2 "exit%="
455   #define BRNCH1 "loop%="
456 #endif
457 
458 /* NOTE: Both VisualAge *and* gcc define __64BIT__
459          see: http://gmplib.org/list-archives/gmp-discuss/2008-July/003339.html */
460 
461 #if defined( __64BIT__ )
462 
463 static __inline__ U64
__cmpxchg_u64(volatile U64 * p,U64 old,U64 new)464 __cmpxchg_u64(volatile U64 *p, U64 old, U64 new)
465 {
466     U64 prev;
467 
468     __asm__ __volatile__ ("\n"
469 LABEL1
470 "       ldarx   %0,0,%2\n\
471         cmpd    0,%0,%3\n\
472         bne     "BRNCH2"\n\
473         stdcx.  %4,0,%2\n\
474         bne-    "BRNCH1"\n"
475 #ifdef OPTION_SMP
476 "       sync\n"
477 #endif /* OPTION_SMP */
478 LABEL2
479     : "=&r" (prev), "=m" (*p)
480     : "r" (p), "r" (old), "r" (new), "m" (*p)
481     : "cc", "memory");
482 
483     return prev;
484 }
485 
486 #define cmpxchg8(x,y,z) cmpxchg8_ppc(x,y,z)
cmpxchg8_ppc(U64 * old,U64 new,void * ptr)487 static __inline__ BYTE cmpxchg8_ppc(U64 *old, U64 new, void *ptr) {
488 /* returns zero on success otherwise returns 1 */
489 U64 prev = *old;
490 return (prev != (*old = __cmpxchg_u64((U64*)ptr, prev, new)));
491 }
492 
493 #endif // defined( __64BIT__ )
494 
495 static __inline__ U32
__cmpxchg_u32(volatile U32 * p,U32 old,U32 new)496 __cmpxchg_u32(volatile U32 *p, U32 old, U32 new)
497 {
498     U32 prev;
499 
500     __asm__ __volatile__ ("\n"
501 LABEL1
502 "       lwarx   %0,0,%2\n\
503         cmpw    0,%0,%3\n\
504         bne     "BRNCH2"\n\
505         stwcx.  %4,0,%2\n\
506         bne-    "BRNCH1"\n"
507 #ifdef OPTION_SMP
508 "       sync\n"
509 #endif /* OPTION_SMP */
510 LABEL2
511     : "=&r" (prev), "=m" (*p)
512     : "r" (p), "r" (old), "r" (new), "m" (*p)
513     : "cc", "memory");
514 
515     return prev;
516 }
517 
518 #define cmpxchg4(x,y,z) cmpxchg4_ppc(x,y,z)
cmpxchg4_ppc(U32 * old,U32 new,void * ptr)519 static __inline__ BYTE cmpxchg4_ppc(U32 *old, U32 new, void *ptr) {
520 /* returns zero on success otherwise returns 1 */
521 U32 prev = *old;
522 return (prev != (*old = __cmpxchg_u32((U32*)ptr, prev, new)));
523 }
524 
525 #define cmpxchg1(x,y,z) cmpxchg1_ppc(x,y,z)
cmpxchg1_ppc(BYTE * old,BYTE new,void * ptr)526 static __inline__ BYTE cmpxchg1_ppc(BYTE *old, BYTE new, void *ptr) {
527 /* returns zero on success otherwise returns 1 */
528 long  off, shift;
529 BYTE  cc;
530 U32  *ptr4, val4, old4, new4;
531 
532     off = (long)ptr & 3;
533     shift = (3 - off) * 8;
534     ptr4 = ptr - off;
535     val4 = *ptr4;
536     old4 = (val4 & ~(0xff << shift)) | (*old << shift);
537     new4 = (val4 & ~(0xff << shift)) | (new << shift);
538     cc = cmpxchg4_ppc(&old4, new4, ptr4);
539     *old = (old4 >> shift) & 0xff;
540     return cc;
541 }
542 
543 #endif /* defined(_ext_ppc) */
544 
545 #endif // !defined( _MSVC_ )
546 
547 /*-------------------------------------------------------------------
548  * Define the ASSIST_ macros
549  *-------------------------------------------------------------------*/
550 #if defined(cmpxchg1)
551  #define ASSIST_CMPXCHG1
552 #endif
553 
554 #if defined(cmpxchg4)
555  #define ASSIST_CMPXCHG4
556 #endif
557 
558 #if defined(cmpxchg8)
559  #define ASSIST_CMPXCHG8
560 #endif
561 
562 #if defined(cmpxchg16)
563  #define ASSIST_CMPXCHG16
564 #endif
565 
566 #if defined(fetch_dw) || defined(fetch_dw_noswap)
567  #define ASSIST_FETCH_DW
568 #endif
569 
570 #if defined(store_dw) || defined(store_dw_noswap)
571  #define ASSIST_STORE_DW
572 #endif
573 
574 /*-------------------------------------------------------------------
575  * Activate OPTION_STRICT_ALIGNMENT to generate different paths
576  * for non-aligned and aligned accesses in certain instructions
577  *-------------------------------------------------------------------*/
578 #define OPTION_STRICT_ALIGNMENT
579 
580 /*-------------------------------------------------------------------
581  * fetch_hw_noswap and fetch_hw
582  *-------------------------------------------------------------------*/
583 #if !defined(fetch_hw_noswap)
584   #if defined(fetch_hw)
585     #define fetch_hw_noswap(_p) CSWAP16(fetch_hw((_p)))
586   #else
587     #if !defined(OPTION_STRICT_ALIGNMENT)
fetch_hw_noswap(void * ptr)588       static __inline__ U16 fetch_hw_noswap(void *ptr) {
589         return *(U16 *)ptr;
590       }
591     #else
fetch_hw_noswap(void * ptr)592       static __inline__ U16 fetch_hw_noswap(void *ptr) {
593         U16 value;
594         memcpy(&value, (BYTE *)ptr, 2);
595         return value;
596       }
597     #endif
598   #endif
599 #endif
600 #if !defined(fetch_hw)
601   #define fetch_hw(_p) CSWAP16(fetch_hw_noswap((_p)))
602 #endif
603 
604 /*-------------------------------------------------------------------
605  * store_hw_noswap and store_hw
606  *-------------------------------------------------------------------*/
607 #if !defined(store_hw_noswap)
608   #if defined(store_hw)
609     #define store_hw_noswap(_p, _v) store_hw((_p), CSWAP16(_v))
610   #else
611     #if !defined(OPTION_STRICT_ALIGNMENT)
store_hw_noswap(void * ptr,U16 value)612       static __inline__ void store_hw_noswap(void *ptr, U16 value) {
613         *(U16 *)ptr = value;
614       }
615     #else
store_hw_noswap(void * ptr,U16 value)616       static __inline__ void store_hw_noswap(void *ptr, U16 value) {
617         memcpy((BYTE *)ptr, (BYTE *)&value, 2);
618       }
619     #endif
620   #endif
621 #endif
622 #if !defined(store_hw)
623   #define store_hw(_p, _v) store_hw_noswap((_p), CSWAP16((_v)))
624 #endif
625 
626 /*-------------------------------------------------------------------
627  * fetch_fw_noswap and fetch_fw
628  *-------------------------------------------------------------------*/
629 #if !defined(fetch_fw_noswap)
630   #if defined(fetch_fw)
631     #define fetch_fw_noswap(_p) CSWAP32(fetch_fw((_p)))
632   #else
633     #if !defined(OPTION_STRICT_ALIGNMENT)
fetch_fw_noswap(const void * ptr)634       static __inline__ U32 fetch_fw_noswap(const void *ptr) {
635         return *(U32 *)ptr;
636       }
637     #else
fetch_fw_noswap(const void * ptr)638       static __inline__ U32 fetch_fw_noswap(const void *ptr) {
639         U32 value;
640         memcpy(&value, (BYTE *)ptr, 4);
641         return value;
642       }
643     #endif
644   #endif
645 #endif
646 #if !defined(fetch_fw)
647   #define fetch_fw(_p) CSWAP32(fetch_fw_noswap((_p)))
648 #endif
649 
650 /*-------------------------------------------------------------------
651  * store_fw_noswap and store_fw
652  *-------------------------------------------------------------------*/
653 #if !defined(store_fw_noswap)
654   #if defined(store_fw)
655     #define store_fw_noswap(_p, _v) store_fw((_p), CSWAP32(_v))
656   #else
657     #if !defined(OPTION_STRICT_ALIGNMENT)
store_fw_noswap(void * ptr,U32 value)658       static __inline__ void store_fw_noswap(void *ptr, U32 value) {
659         *(U32 *)ptr = value;
660       }
661     #else
store_fw_noswap(void * ptr,U32 value)662       static __inline__ void store_fw_noswap(void *ptr, U32 value) {
663         memcpy((BYTE *)ptr, (BYTE *)&value, 4);
664       }
665     #endif
666   #endif
667 #endif
668 #if !defined(store_fw)
669   #define store_fw(_p, _v) store_fw_noswap((_p), CSWAP32((_v)))
670 #endif
671 
672 /*-------------------------------------------------------------------
673  * fetch_dw_noswap and fetch_dw
674  *-------------------------------------------------------------------*/
675 #if !defined(fetch_dw_noswap)
676   #if defined(fetch_dw)
677     #define fetch_dw_noswap(_p) CSWAP64(fetch_dw((_p)))
678   #else
679     #if !defined(OPTION_STRICT_ALIGNMENT)
fetch_dw_noswap(void * ptr)680       static __inline__ U64 fetch_dw_noswap(void *ptr) {
681         return *(U64 *)ptr;
682       }
683     #else
fetch_dw_noswap(void * ptr)684       static __inline__ U64 fetch_dw_noswap(void *ptr) {
685         U64 value;
686         memcpy(&value, (BYTE *)ptr, 8);
687         return value;
688       }
689     #endif
690   #endif
691 #endif
692 #if !defined(fetch_dw)
693   #define fetch_dw(_p) CSWAP64(fetch_dw_noswap((_p)))
694 #endif
695 
696 /*-------------------------------------------------------------------
697  * store_dw_noswap and store_dw
698  *-------------------------------------------------------------------*/
699 #if !defined(store_dw_noswap)
700   #if defined(store_dw)
701     #define store_dw_noswap(_p, _v) store_dw((_p), CSWAP64(_v))
702   #else
703     #if !defined(OPTION_STRICT_ALIGNMENT)
store_dw_noswap(void * ptr,U64 value)704       static __inline__ void store_dw_noswap(void *ptr, U64 value) {
705         *(U64 *)ptr = value;
706       }
707     #else
store_dw_noswap(void * ptr,U64 value)708       static __inline__ void store_dw_noswap(void *ptr, U64 value) {
709         memcpy((BYTE *)ptr, (BYTE *)&value, 8);
710       }
711     #endif
712   #endif
713 #endif
714 #if !defined(store_dw)
715   #define store_dw(_p, _v) store_dw_noswap((_p), CSWAP64((_v)))
716 #endif
717 
718 /*-------------------------------------------------------------------
719  * cmpxchg1
720  *-------------------------------------------------------------------*/
721 #ifndef cmpxchg1
cmpxchg1(BYTE * old,BYTE new,volatile void * ptr)722 static __inline__ BYTE cmpxchg1(BYTE *old, BYTE new, volatile void *ptr) {
723  BYTE code;
724  if (*old == *(BYTE *)ptr)
725  {
726      *(BYTE *)ptr = new;
727      code = 0;
728  }
729  else
730  {
731      *old = *(BYTE *)ptr;
732      code = 1;
733  }
734  return code;
735 }
736 #endif
737 
738 /*-------------------------------------------------------------------
739  * cmpxchg4
740  *-------------------------------------------------------------------*/
741 #ifndef cmpxchg4
cmpxchg4(U32 * old,U32 new,volatile void * ptr)742 static __inline__ BYTE cmpxchg4(U32 *old, U32 new, volatile void *ptr) {
743  BYTE code;
744  if (*old == *(U32 *)ptr)
745  {
746      *(U32 *)ptr = new;
747      code = 0;
748  }
749  else
750  {
751      *old = *(U32 *)ptr;
752      code = 1;
753  }
754  return code;
755 }
756 #endif
757 
758 /*-------------------------------------------------------------------
759  * cmpxchg8
760  *-------------------------------------------------------------------*/
761 #ifndef cmpxchg8
cmpxchg8(U64 * old,U64 new,volatile void * ptr)762 static __inline__ BYTE cmpxchg8(U64 *old, U64 new, volatile void *ptr) {
763  BYTE code;
764  if (*old == *(U64 *)ptr)
765  {
766      *(U64 *)ptr = new;
767      code = 0;
768  }
769  else
770  {
771      *old = *(U64 *)ptr;
772      code = 1;
773  }
774  return code;
775 }
776 #endif
777 
778 /*-------------------------------------------------------------------
779  * cmpxchg16
780  *-------------------------------------------------------------------*/
781 #ifndef cmpxchg16
cmpxchg16(U64 * old1,U64 * old2,U64 new1,U64 new2,volatile void * ptr)782 static __inline__ int cmpxchg16(U64 *old1, U64 *old2, U64 new1, U64 new2, volatile void *ptr) {
783  int code;
784  if (*old1 == *(U64 *)ptr && *old2 == *((U64 *)ptr + 1))
785  {
786      *(U64 *)ptr = new1;
787      *((U64 *)ptr + 1) = new2;
788      code = 0;
789  }
790  else
791  {
792      *old1 = *((U64 *)ptr);
793      *old2 = *((U64 *)ptr + 1);
794      code = 1;
795  }
796  return code;
797 }
798 #endif
799 
800 #ifndef BIT
801 #define BIT(nr) (1<<(nr))
802 #endif
803 
804 #endif /* _HERCULES_MACHDEP_H */
805