1 /*
2  * Copyright (c) 2003 Hewlett-Packard Development Company, L.P.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
23 #include "../all_aligned_atomic_load_store.h"
24 
25 /* Real X86 implementations appear                                      */
26 /* to enforce ordering between memory operations, EXCEPT that a later   */
27 /* read can pass earlier writes, presumably due to the visible          */
28 /* presence of store buffers.                                           */
29 /* We ignore the fact that the official specs                           */
30 /* seem to be much weaker (and arguably too weak to be usable).         */
31 
32 #include "../ordered_except_wr.h"
33 
34 #ifdef AO_ASM_X64_AVAILABLE
35 # include "../test_and_set_t_is_char.h"
36 #else
37 # include "../test_and_set_t_is_ao_t.h"
38 #endif
39 
40 #include "../standard_ao_double_t.h"
41 
42 #include <windows.h>
43         /* Seems like over-kill, but that's what MSDN recommends.       */
44         /* And apparently winbase.h is not always self-contained.       */
45 
46 /* Assume _MSC_VER >= 1400 */
47 #include <intrin.h>
48 
49 #pragma intrinsic (_ReadWriteBarrier)
50 
51 #pragma intrinsic (_InterlockedIncrement64)
52 #pragma intrinsic (_InterlockedDecrement64)
53 #pragma intrinsic (_InterlockedExchange64)
54 #pragma intrinsic (_InterlockedExchangeAdd64)
55 #pragma intrinsic (_InterlockedCompareExchange64)
56 
57 AO_INLINE AO_t
AO_fetch_and_add_full(volatile AO_t * p,AO_t incr)58 AO_fetch_and_add_full (volatile AO_t *p, AO_t incr)
59 {
60   return _InterlockedExchangeAdd64((LONGLONG volatile *)p, (LONGLONG)incr);
61 }
62 
63 #define AO_HAVE_fetch_and_add_full
64 
65 AO_INLINE AO_t
AO_fetch_and_add1_full(volatile AO_t * p)66 AO_fetch_and_add1_full (volatile AO_t *p)
67 {
68   return _InterlockedIncrement64((LONGLONG volatile *)p) - 1;
69 }
70 
71 #define AO_HAVE_fetch_and_add1_full
72 
73 AO_INLINE AO_t
AO_fetch_and_sub1_full(volatile AO_t * p)74 AO_fetch_and_sub1_full (volatile AO_t *p)
75 {
76   return _InterlockedDecrement64((LONGLONG volatile *)p) + 1;
77 }
78 
79 #define AO_HAVE_fetch_and_sub1_full
80 
81 AO_INLINE int
AO_compare_and_swap_full(volatile AO_t * addr,AO_t old,AO_t new_val)82 AO_compare_and_swap_full(volatile AO_t *addr,
83                          AO_t old, AO_t new_val)
84 {
85     return _InterlockedCompareExchange64((LONGLONG volatile *)addr,
86                                          (LONGLONG)new_val, (LONGLONG)old)
87            == (LONGLONG)old;
88 }
89 
90 #define AO_HAVE_compare_and_swap_full
91 
92 /* As far as we can tell, the lfence and sfence instructions are not    */
93 /* currently needed or useful for cached memory accesses.               */
94 
95 #ifdef AO_ASM_X64_AVAILABLE
96 
97 AO_INLINE void
AO_nop_full(void)98 AO_nop_full(void)
99 {
100   /* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips.      */
101   __asm { mfence }
102 }
103 
104 #define AO_HAVE_nop_full
105 
106 AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t * addr)107 AO_test_and_set_full(volatile AO_TS_t *addr)
108 {
109     __asm
110     {
111         mov     rax,AO_TS_SET           ;
112         mov     rbx,addr                ;
113         xchg    byte ptr [rbx],al       ;
114     }
115 }
116 
117 #define AO_HAVE_test_and_set_full
118 
119 #endif /* AO_ASM_X64_AVAILABLE */
120 
121 #ifdef AO_CMPXCHG16B_AVAILABLE
122 
123 /* AO_compare_double_and_swap_double_full needs implementation for Win64.
124  * Also see ../gcc/x86_64.h for partial old Opteron workaround.
125  */
126 
127 # if _MSC_VER >= 1500
128 
129 #pragma intrinsic (_InterlockedCompareExchange128)
130 
131 AO_INLINE int
AO_compare_double_and_swap_double_full(volatile AO_double_t * addr,AO_t old_val1,AO_t old_val2,AO_t new_val1,AO_t new_val2)132 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
133                                        AO_t old_val1, AO_t old_val2,
134                                        AO_t new_val1, AO_t new_val2)
135 {
136    __int64 comparandResult[2];
137    comparandResult[0] = old_val1; /* low */
138    comparandResult[1] = old_val2; /* high */
139    return _InterlockedCompareExchange128((volatile __int64 *)addr,
140                 new_val2 /* high */, new_val1 /* low */, comparandResult);
141 }
142 
143 #   define AO_HAVE_compare_double_and_swap_double_full
144 
145 # elif defined(AO_ASM_X64_AVAILABLE)
146 
147  /* If there is no intrinsic _InterlockedCompareExchange128 then we
148   * need basically what's given below.
149   */
150 
151 AO_INLINE int
AO_compare_double_and_swap_double_full(volatile AO_double_t * addr,AO_t old_val1,AO_t old_val2,AO_t new_val1,AO_t new_val2)152 AO_compare_double_and_swap_double_full(volatile AO_double_t *addr,
153                                        AO_t old_val1, AO_t old_val2,
154                                        AO_t new_val1, AO_t new_val2)
155 {
156         __asm
157         {
158                 mov     rdx,QWORD PTR [old_val2]        ;
159                 mov     rax,QWORD PTR [old_val1]        ;
160                 mov     rcx,QWORD PTR [new_val2]        ;
161                 mov     rbx,QWORD PTR [new_val1]        ;
162                 lock cmpxchg16b [addr]                  ;
163                 setz    rax                             ;
164         }
165 }
166 
167 #   define AO_HAVE_compare_double_and_swap_double_full
168 
169 # endif /* _MSC_VER >= 1500 || AO_ASM_X64_AVAILABLE */
170 
171 #endif /* AO_CMPXCHG16B_AVAILABLE */
172