1 /*
2 * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "../all_aligned_atomic_load_store.h"
24
25 /* Real X86 implementations appear */
26 /* to enforce ordering between memory operations, EXCEPT that a later */
27 /* read can pass earlier writes, presumably due to the visible */
28 /* presence of store buffers. */
29 /* We ignore the fact that the official specs */
30 /* seem to be much weaker (and arguably too weak to be usable). */
31
32 #include "../ordered_except_wr.h"
33
34 #ifdef AO_ASM_X64_AVAILABLE
35 # include "../test_and_set_t_is_char.h"
36 #else
37 # include "../test_and_set_t_is_ao_t.h"
38 #endif
39
40 #include <windows.h>
41 /* Seems like over-kill, but that's what MSDN recommends. */
42 /* And apparently winbase.h is not always self-contained. */
43
44 /* Assume _MSC_VER >= 1400 */
45 #include <intrin.h>
46
47 #pragma intrinsic (_InterlockedExchangeAdd)
48 #pragma intrinsic (_InterlockedCompareExchange64)
49
50 #ifndef AO_PREFER_GENERALIZED
51
52 # pragma intrinsic (_InterlockedIncrement64)
53 # pragma intrinsic (_InterlockedDecrement64)
54 # pragma intrinsic (_InterlockedExchangeAdd64)
55
56 AO_INLINE AO_t
AO_fetch_and_add_full(volatile AO_t * p,AO_t incr)57 AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
58 {
59 return _InterlockedExchangeAdd64((LONGLONG volatile *)p, (LONGLONG)incr);
60 }
61 #define AO_HAVE_fetch_and_add_full
62
63 AO_INLINE AO_t
AO_fetch_and_add1_full(volatile AO_t * p)64 AO_fetch_and_add1_full (volatile AO_t *p)
65 {
66 return _InterlockedIncrement64((LONGLONG volatile *)p) - 1;
67 }
68 #define AO_HAVE_fetch_and_add1_full
69
70 AO_INLINE AO_t
AO_fetch_and_sub1_full(volatile AO_t * p)71 AO_fetch_and_sub1_full (volatile AO_t *p)
72 {
73 return _InterlockedDecrement64((LONGLONG volatile *)p) + 1;
74 }
75 #define AO_HAVE_fetch_and_sub1_full
76 #endif /* !AO_PREFER_GENERALIZED */
77
78 AO_INLINE AO_t
AO_fetch_compare_and_swap_full(volatile AO_t * addr,AO_t old_val,AO_t new_val)79 AO_fetch_compare_and_swap_full(volatile AO_t *addr, AO_t old_val,
80 AO_t new_val)
81 {
82 return (AO_t)_InterlockedCompareExchange64((LONGLONG volatile *)addr,
83 (LONGLONG)new_val, (LONGLONG)old_val);
84 }
85 #define AO_HAVE_fetch_compare_and_swap_full
86
87 AO_INLINE unsigned int
AO_int_fetch_and_add_full(volatile unsigned int * p,unsigned int incr)88 AO_int_fetch_and_add_full(volatile unsigned int *p, unsigned int incr)
89 {
90 return _InterlockedExchangeAdd((LONG volatile *)p, incr);
91 }
92 #define AO_HAVE_int_fetch_and_add_full
93
94 #ifdef AO_ASM_X64_AVAILABLE
95
96 AO_INLINE unsigned char
AO_char_fetch_and_add_full(volatile unsigned char * p,unsigned char incr)97 AO_char_fetch_and_add_full(volatile unsigned char *p, unsigned char incr)
98 {
99 __asm
100 {
101 mov al, incr
102 mov rbx, p
103 lock xadd byte ptr [rbx], al
104 }
105 }
106 # define AO_HAVE_char_fetch_and_add_full
107
108 AO_INLINE unsigned short
AO_short_fetch_and_add_full(volatile unsigned short * p,unsigned short incr)109 AO_short_fetch_and_add_full(volatile unsigned short *p, unsigned short incr)
110 {
111 __asm
112 {
113 mov ax, incr
114 mov rbx, p
115 lock xadd word ptr [rbx], ax
116 }
117 }
118 # define AO_HAVE_short_fetch_and_add_full
119
120 /* As far as we can tell, the lfence and sfence instructions are not */
121 /* currently needed or useful for cached memory accesses. */
122
123 AO_INLINE void
AO_nop_full(void)124 AO_nop_full(void)
125 {
126 /* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips. */
127 __asm { mfence }
128 }
129 # define AO_HAVE_nop_full
130
131 AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t * addr)132 AO_test_and_set_full(volatile AO_TS_t *addr)
133 {
134 __asm
135 {
136 mov rax,AO_TS_SET ;
137 mov rbx,addr ;
138 xchg byte ptr [rbx],al ;
139 }
140 }
141 # define AO_HAVE_test_and_set_full
142
143 #endif /* AO_ASM_X64_AVAILABLE */
144
145 #ifdef AO_CMPXCHG16B_AVAILABLE
146 /* AO_compare_double_and_swap_double_full needs implementation for Win64.
147 * Also see ../gcc/x86.h for partial old Opteron workaround.
148 */
149
150 # if _MSC_VER >= 1500
151
152 # include "../standard_ao_double_t.h"
153
154 # pragma intrinsic (_InterlockedCompareExchange128)
155
156 AO_INLINE int
AO_compare_double_and_swap_double_full(volatile AO_double_t * addr,AO_t old_val1,AO_t old_val2,AO_t new_val1,AO_t new_val2)157 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
158 AO_t old_val1, AO_t old_val2,
159 AO_t new_val1, AO_t new_val2)
160 {
161 __int64 comparandResult[2];
162 comparandResult[0] = old_val1; /* low */
163 comparandResult[1] = old_val2; /* high */
164 return _InterlockedCompareExchange128((volatile __int64 *)addr,
165 new_val2 /* high */, new_val1 /* low */, comparandResult);
166 }
167 # define AO_HAVE_compare_double_and_swap_double_full
168
169 # elif defined(AO_ASM_X64_AVAILABLE)
170
171 # include "../standard_ao_double_t.h"
172
173 /* If there is no intrinsic _InterlockedCompareExchange128 then we */
174 /* need basically what's given below. */
175 AO_INLINE int
AO_compare_double_and_swap_double_full(volatile AO_double_t * addr,AO_t old_val1,AO_t old_val2,AO_t new_val1,AO_t new_val2)176 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
177 AO_t old_val1, AO_t old_val2,
178 AO_t new_val1, AO_t new_val2)
179 {
180 __asm
181 {
182 mov rdx,QWORD PTR [old_val2] ;
183 mov rax,QWORD PTR [old_val1] ;
184 mov rcx,QWORD PTR [new_val2] ;
185 mov rbx,QWORD PTR [new_val1] ;
186 lock cmpxchg16b [addr] ;
187 setz rax ;
188 }
189 }
190 # define AO_HAVE_compare_double_and_swap_double_full
191 # endif /* AO_ASM_X64_AVAILABLE && (_MSC_VER < 1500) */
192
193 #endif /* AO_CMPXCHG16B_AVAILABLE */
194