1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 2011-2020. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20
21 /*
22 * Description: Native double word atomics for x86/x86_64
23 * Author: Rickard Green
24 */
25
26 #ifndef ETHR_X86_DW_ATOMIC_H__
27 #define ETHR_X86_DW_ATOMIC_H__
28
29 #ifdef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT
30
31 #define ETHR_HAVE_NATIVE_DW_ATOMIC
32 #define ETHR_NATIVE_DW_ATOMIC_IMPL "ethread"
33
34 /*
35 * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used
36 * at runtime in order to determine if native or fallback implementation
37 * should be used.
38 */
39 #define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \
40 ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__
41
42 #if ETHR_SIZEOF_PTR == 4
43 typedef volatile ethr_sint64_t * ethr_native_dw_ptr_t;
44 # define ETHR_DW_NATMC_ALIGN_MASK__ 0x7
45 # define ETHR_DW_CMPXCHG_SFX__ "8b"
46 # define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t
47 #else
48 #ifdef ETHR_HAVE_INT128_T
49 # define ETHR_NATIVE_SU_DW_SINT_T ethr_sint128_t
50 typedef volatile ethr_sint128_t * ethr_native_dw_ptr_t;
51 #else
52 typedef struct {
53 ethr_sint64_t sint64[2];
54 } ethr_native_sint128_t__;
55 typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t;
56 #endif
57 # define ETHR_DW_NATMC_ALIGN_MASK__ 0xf
58 # define ETHR_DW_CMPXCHG_SFX__ "16b"
59 #endif
60
61 /*
62 * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned
63 * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is
64 * not common, and at least some glibc malloc implementations
65 * only 4 byte align in 32-bit mode.
66 *
67 * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte
68 * aligned memory in 32-bit mode. A malloc implementation that does
69 * not adhere to these alignment requirements is seriously broken,
70 * and we wont bother trying to work around it.
71 *
72 * Since memory alignment may be off by one word we need to align at
73 * runtime. We, therefore, need an extra word allocated.
74 */
75 #define ETHR_DW_NATMC_MEM__(VAR) \
76 (&(VAR)->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
77 typedef union {
78 #ifdef ETHR_NATIVE_SU_DW_SINT_T
79 volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
80 #endif
81 volatile ethr_sint_t sint[3];
82 volatile char c[ETHR_SIZEOF_PTR*3];
83 } ethr_native_dw_atomic_t;
84
85
86 #if (defined(ETHR_TRY_INLINE_FUNCS) \
87 || defined(ETHR_ATOMIC_IMPL__) \
88 || defined(ETHR_X86_SSE2_ASM_C__)) \
89 && ETHR_SIZEOF_PTR == 4 \
90 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
91 ethr_sint64_t
92 ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var);
93 void
94 ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
95 ethr_sint64_t val);
96 #endif
97
98 #if (defined(ETHR_TRY_INLINE_FUNCS) \
99 || defined(ETHR_ATOMIC_IMPL__) \
100 || defined(ETHR_X86_SSE2_ASM_C__))
101 # ifdef ETHR_DEBUG
102 # define ETHR_DW_DBG_ALIGNED__(PTR) \
103 ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0);
104 # else
105 # define ETHR_DW_DBG_ALIGNED__(PTR)
106 # endif
107 #endif
108
109 #if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
110
111 #define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR
112 static ETHR_INLINE ethr_sint_t *
ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t * var)113 ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var)
114 {
115 return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var);
116 }
117
118 #if defined(ETHR_CMPXCHG8B_PIC_NO_CLOBBER_EBX) && defined(__PIC__) && __PIC__
119 #if ETHR_SIZEOF_PTR != 4
120 # error unexpected pic issue
121 #endif
122 /*
123 * When position independent code is used in 32-bit mode, the EBX register
124 * is used for storage of global offset table address. When compiling with
125 * an old gcc (< vsn 5) we may not use it as input or output in an inline
126 * asm. We then need to save and restore the EBX register explicitly (for
127 * some reason old gcc compilers didn't provide this service to us).
128 * ETHR_CMPXCHG8B_PIC_NO_CLOBBER_EBX will be defined if we need to
129 * explicitly manage EBX ourselves.
130 *
131 */
132 # define ETHR_NO_CLOBBER_EBX__ 1
133 #else
134 # define ETHR_NO_CLOBBER_EBX__ 0
135 #endif
136
137 #if ETHR_NO_CLOBBER_EBX__ && !defined(ETHR_CMPXCHG8B_REGISTER_SHORTAGE)
138 /* When no optimization is on, we'll run into a register shortage */
139 # if defined(ETHR_DEBUG) || defined(DEBUG) || defined(VALGRIND) \
140 || defined(GCOV)
141 # define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 1
142 # else
143 # define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 0
144 # endif
145 #endif
146
147
148 #define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB
149
150 static ETHR_INLINE int
ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t * var,ethr_sint_t * new_value,ethr_sint_t * xchg)151 ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var,
152 ethr_sint_t *new_value,
153 ethr_sint_t *xchg)
154 {
155 ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
156 char xchgd;
157
158 ETHR_DW_DBG_ALIGNED__(p);
159
160 #if ETHR_NO_CLOBBER_EBX__ && ETHR_CMPXCHG8B_REGISTER_SHORTAGE
161 /*
162 * gcc wont let us use ebx as input and we
163 * get a register shortage
164 */
165
166 __asm__ __volatile__(
167 "pushl %%ebx\n\t"
168 "movl (%7), %%ebx\n\t"
169 "movl 4(%7), %%ecx\n\t"
170 "lock; cmpxchg8b %0\n\t"
171 "setz %3\n\t"
172 "popl %%ebx\n\t"
173 : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd)
174 : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "r"(new_value)
175 : "cc", "memory");
176
177 #elif ETHR_NO_CLOBBER_EBX__
178 /*
179 * gcc wont let us use ebx as input
180 */
181
182 __asm__ __volatile__(
183 "pushl %%ebx\n\t"
184 "movl %8, %%ebx\n\t"
185 "lock; cmpxchg8b %0\n\t"
186 "setz %3\n\t"
187 "popl %%ebx\n\t"
188 : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=q"(xchgd)
189 : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "c"(new_value[1]), "r"(new_value[0])
190 : "cc", "memory");
191
192 #else
193 /*
194 * gcc lets us place values in the registers where
195 * we want them
196 */
197
198 __asm__ __volatile__(
199 "lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t"
200 "setz %3\n\t"
201 : "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=q"(xchgd)
202 : "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "c"(new_value[1]), "b"(new_value[0])
203 : "cc", "memory");
204
205 #endif
206
207 return (int) xchgd;
208 }
209
210 #undef ETHR_NO_CLOBBER_EBX__
211
212 #if ETHR_SIZEOF_PTR == 4 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
213
214 typedef union {
215 ethr_sint64_t sint64;
216 ethr_sint_t sint[2];
217 } ethr_dw_atomic_no_sse2_convert_t;
218
219 #define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ
220
221 static ETHR_INLINE ethr_sint64_t
ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t * var)222 ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
223 {
224 if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
225 return ethr_sse2_native_su_dw_atomic_read(var);
226 else {
227 ethr_sint_t new_value[2];
228 ethr_dw_atomic_no_sse2_convert_t xchg;
229 new_value[0] = new_value[1] = xchg.sint[0] = xchg.sint[1] = 0x83838383;
230 (void) ethr_native_dw_atomic_cmpxchg_mb(var, new_value, xchg.sint);
231 return xchg.sint64;
232 }
233 }
234
235 #define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET
236
237 static ETHR_INLINE void
ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t * var,ethr_sint64_t val)238 ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
239 ethr_sint64_t val)
240 {
241 if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
242 ethr_sse2_native_su_dw_atomic_set(var, val);
243 else {
244 ethr_sint_t xchg[2] = {0, 0};
245 ethr_dw_atomic_no_sse2_convert_t new_value;
246 new_value.sint64 = val;
247 while (!ethr_native_dw_atomic_cmpxchg_mb(var, new_value.sint, xchg));
248 }
249 }
250
251 #endif /* ETHR_SIZEOF_PTR == 4 */
252
253 #endif /* ETHR_TRY_INLINE_FUNCS */
254
255 #if defined(ETHR_X86_SSE2_ASM_C__) \
256 && ETHR_SIZEOF_PTR == 4 \
257 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
258
259 /*
260 * 8-byte aligned loads and stores of 64-bit values are atomic from
261 * pentium and forward. An ordinary volatile load or store in 32-bit
262 * mode generates two 32-bit operations (at least with gcc-4.1.2 using
263 * -msse2). In order to guarantee one 64-bit load/store operation
264 * from/to memory we load/store via an xmm register using movq.
265 *
266 * Load/store can be achieved using cmpxchg8b, however, using movq is
267 * much faster. Unfortunately we cannot do the same thing in 64-bit
268 * mode; instead, we have to do loads and stores via cmpxchg16b.
269 *
270 * We do not inline these, but instead compile these into a separate
271 * object file using -msse2. This since we don't want to use -msse2 for
272 * the whole system. If we detect sse2 support (pentium4 and forward)
273 * at runtime, we use them; otherwise, we fall back to using cmpxchg8b
274 * for loads and stores. This way the binary can be moved between
275 * processors with and without sse2 support.
276 */
277
278 ethr_sint64_t
ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t * var)279 ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
280 {
281 ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
282 ethr_sint64_t val;
283 ETHR_DW_DBG_ALIGNED__(p);
284 __asm__ __volatile__("movq %1, %0\n\t" : "=x"(val) : "m"(*p) : "memory");
285 return val;
286 }
287
288 void
ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t * var,ethr_sint64_t val)289 ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
290 ethr_sint64_t val)
291 {
292 ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
293 ETHR_DW_DBG_ALIGNED__(p);
294 __asm__ __volatile__("movq %1, %0\n\t" : "=m"(*p) : "x"(val) : "memory");
295 }
296
297 #endif /* ETHR_X86_SSE2_ASM_C__ */
298
299 #endif /* ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT */
300
301 #endif /* ETHR_X86_DW_ATOMIC_H__ */
302
303