1 /*
2  * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
23 #include "../all_aligned_atomic_load_store.h"
24 
25 /* Real X86 implementations appear                                      */
26 /* to enforce ordering between memory operations, EXCEPT that a later   */
27 /* read can pass earlier writes, presumably due to the visible          */
28 /* presence of store buffers.                                           */
29 /* We ignore the fact that the official specs                           */
30 /* seem to be much weaker (and arguably too weak to be usable).         */
31 
32 #include "../ordered_except_wr.h"
33 
34 #ifdef AO_ASM_X64_AVAILABLE
35 # include "../test_and_set_t_is_char.h"
36 #else
37 # include "../test_and_set_t_is_ao_t.h"
38 #endif
39 
40 #include <windows.h>
41         /* Seems like over-kill, but that's what MSDN recommends.       */
42         /* And apparently winbase.h is not always self-contained.       */
43 
44 /* Assume _MSC_VER >= 1400 */
45 #include <intrin.h>
46 
47 #pragma intrinsic (_InterlockedExchangeAdd)
48 #pragma intrinsic (_InterlockedCompareExchange64)
49 
50 #ifndef AO_PREFER_GENERALIZED
51 
52 # pragma intrinsic (_InterlockedIncrement64)
53 # pragma intrinsic (_InterlockedDecrement64)
54 # pragma intrinsic (_InterlockedExchangeAdd64)
55 
56 AO_INLINE AO_t
AO_fetch_and_add_full(volatile AO_t * p,AO_t incr)57 AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
58 {
59   return _InterlockedExchangeAdd64((LONGLONG volatile *)p, (LONGLONG)incr);
60 }
61 #define AO_HAVE_fetch_and_add_full
62 
63 AO_INLINE AO_t
AO_fetch_and_add1_full(volatile AO_t * p)64 AO_fetch_and_add1_full (volatile AO_t *p)
65 {
66   return _InterlockedIncrement64((LONGLONG volatile *)p) - 1;
67 }
68 #define AO_HAVE_fetch_and_add1_full
69 
70 AO_INLINE AO_t
AO_fetch_and_sub1_full(volatile AO_t * p)71 AO_fetch_and_sub1_full (volatile AO_t *p)
72 {
73   return _InterlockedDecrement64((LONGLONG volatile *)p) + 1;
74 }
75 #define AO_HAVE_fetch_and_sub1_full
76 #endif /* !AO_PREFER_GENERALIZED */
77 
78 AO_INLINE AO_t
AO_fetch_compare_and_swap_full(volatile AO_t * addr,AO_t old_val,AO_t new_val)79 AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val,
80                                AO_t new_val)
81 {
82   return (AO_t)_InterlockedCompareExchange64((LONGLONG volatile *)addr,
83                                         (LONGLONG)new_val, (LONGLONG)old_val);
84 }
85 #define AO_HAVE_fetch_compare_and_swap_full
86 
87 AO_INLINE unsigned int
AO_int_fetch_and_add_full(volatile unsigned int * p,unsigned int incr)88 AO_int_fetch_and_add_full(volatile unsigned int *p, unsigned int incr)
89 {
90   return _InterlockedExchangeAdd((LONG volatile *)p, incr);
91 }
92 #define AO_HAVE_int_fetch_and_add_full
93 
94 #ifdef AO_ASM_X64_AVAILABLE
95 
96   AO_INLINE unsigned char
AO_char_fetch_and_add_full(volatile unsigned char * p,unsigned char incr)97   AO_char_fetch_and_add_full(volatile unsigned char *p, unsigned char incr)
98   {
99     __asm
100     {
101       mov al, incr
102       mov rbx, p
103       lock xadd byte ptr [rbx], al
104     }
105   }
106 # define AO_HAVE_char_fetch_and_add_full
107 
108   AO_INLINE unsigned short
AO_short_fetch_and_add_full(volatile unsigned short * p,unsigned short incr)109   AO_short_fetch_and_add_full(volatile unsigned short *p, unsigned short incr)
110   {
111     __asm
112     {
113       mov ax, incr
114       mov rbx, p
115       lock xadd word ptr [rbx], ax
116     }
117   }
118 # define AO_HAVE_short_fetch_and_add_full
119 
120 /* As far as we can tell, the lfence and sfence instructions are not    */
121 /* currently needed or useful for cached memory accesses.               */
122 
123   AO_INLINE void
AO_nop_full(void)124   AO_nop_full(void)
125   {
126     /* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips.    */
127     __asm { mfence }
128   }
129 # define AO_HAVE_nop_full
130 
131   AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t * addr)132   AO_test_and_set_full(volatile AO_TS_t *addr)
133   {
134     __asm
135     {
136         mov     rax,AO_TS_SET           ;
137         mov     rbx,addr                ;
138         xchg    byte ptr [rbx],al       ;
139     }
140   }
141 # define AO_HAVE_test_and_set_full
142 
143 #endif /* AO_ASM_X64_AVAILABLE */
144 
145 #ifdef AO_CMPXCHG16B_AVAILABLE
146 /* AO_compare_double_and_swap_double_full needs implementation for Win64.
147  * Also see ../gcc/x86.h for partial old Opteron workaround.
148  */
149 
150 # if _MSC_VER >= 1500
151 
152 #   include "../standard_ao_double_t.h"
153 
154 #   pragma intrinsic (_InterlockedCompareExchange128)
155 
156 AO_INLINE int
AO_compare_double_and_swap_double_full(volatile AO_double_t * addr,AO_t old_val1,AO_t old_val2,AO_t new_val1,AO_t new_val2)157 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
158                                        AO_t old_val1, AO_t old_val2,
159                                        AO_t new_val1, AO_t new_val2)
160 {
161    __int64 comparandResult[2];
162    comparandResult[0] = old_val1; /* low */
163    comparandResult[1] = old_val2; /* high */
164    return _InterlockedCompareExchange128((volatile __int64 *)addr,
165                 new_val2 /* high */, new_val1 /* low */, comparandResult);
166 }
167 #   define AO_HAVE_compare_double_and_swap_double_full
168 
169 # elif defined(AO_ASM_X64_AVAILABLE)
170 
171 #   include "../standard_ao_double_t.h"
172 
173     /* If there is no intrinsic _InterlockedCompareExchange128 then we  */
174     /* need basically what's given below.                               */
175 AO_INLINE int
AO_compare_double_and_swap_double_full(volatile AO_double_t * addr,AO_t old_val1,AO_t old_val2,AO_t new_val1,AO_t new_val2)176 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
177                                        AO_t old_val1, AO_t old_val2,
178                                        AO_t new_val1, AO_t new_val2)
179 {
180         __asm
181         {
182                 mov     rdx,QWORD PTR [old_val2]        ;
183                 mov     rax,QWORD PTR [old_val1]        ;
184                 mov     rcx,QWORD PTR [new_val2]        ;
185                 mov     rbx,QWORD PTR [new_val1]        ;
186                 lock cmpxchg16b [addr]                  ;
187                 setz    rax                             ;
188         }
189 }
190 #   define AO_HAVE_compare_double_and_swap_double_full
191 # endif /* AO_ASM_X64_AVAILABLE && (_MSC_VER < 1500) */
192 
193 #endif /* AO_CMPXCHG16B_AVAILABLE */
194