1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 2011-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 /*
22  * Description: Native double word atomics for x86/x86_64
23  * Author: Rickard Green
24  */
25 
26 #ifndef ETHR_X86_DW_ATOMIC_H__
27 #define ETHR_X86_DW_ATOMIC_H__
28 
29 #ifdef ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT
30 
31 #define ETHR_HAVE_NATIVE_DW_ATOMIC
32 #define ETHR_NATIVE_DW_ATOMIC_IMPL "ethread"
33 
34 /*
35  * If ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ is defined, it will be used
36  * at runtime in order to determine if native or fallback implementation
37  * should be used.
38  */
39 #define ETHR_RTCHK_USE_NATIVE_DW_ATOMIC_IMPL__ \
40   ETHR_X86_RUNTIME_CONF_HAVE_DW_CMPXCHG__
41 
42 #if ETHR_SIZEOF_PTR == 4
43 typedef volatile ethr_sint64_t * ethr_native_dw_ptr_t;
44 #  define ETHR_DW_NATMC_ALIGN_MASK__ 0x7
45 #  define ETHR_DW_CMPXCHG_SFX__ "8b"
46 #  define ETHR_NATIVE_SU_DW_SINT_T ethr_sint64_t
47 #else
48 #ifdef ETHR_HAVE_INT128_T
49 #  define ETHR_NATIVE_SU_DW_SINT_T ethr_sint128_t
50 typedef volatile ethr_sint128_t * ethr_native_dw_ptr_t;
51 #else
52 typedef struct {
53     ethr_sint64_t sint64[2];
54 } ethr_native_sint128_t__;
55 typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t;
56 #endif
57 #  define ETHR_DW_NATMC_ALIGN_MASK__ 0xf
58 #  define ETHR_DW_CMPXCHG_SFX__ "16b"
59 #endif
60 
61 /*
62  * We need 16 byte aligned memory in 64-bit mode, and 8 byte aligned
63  * memory in 32-bit mode. 16 byte aligned malloc in 64-bit mode is
64  * not common, and at least some glibc malloc implementations
65  * only 4 byte align in 32-bit mode.
66  *
67  * This code assumes 8 byte aligned memory in 64-bit mode, and 4 byte
68  * aligned memory in 32-bit mode. A malloc implementation that does
69  * not adhere to these alignment requirements is seriously broken,
70  * and we wont bother trying to work around it.
71  *
72  * Since memory alignment may be off by one word we need to align at
73  * runtime. We, therefore, need an extra word allocated.
74  */
75 #define ETHR_DW_NATMC_MEM__(VAR) \
76    (&(VAR)->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
77 typedef union {
78 #ifdef ETHR_NATIVE_SU_DW_SINT_T
79     volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
80 #endif
81     volatile ethr_sint_t sint[3];
82     volatile char c[ETHR_SIZEOF_PTR*3];
83 } ethr_native_dw_atomic_t;
84 
85 
86 #if (defined(ETHR_TRY_INLINE_FUNCS) \
87      || defined(ETHR_ATOMIC_IMPL__) \
88      || defined(ETHR_X86_SSE2_ASM_C__)) \
89     && ETHR_SIZEOF_PTR == 4 \
90     && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
91 ethr_sint64_t
92 ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var);
93 void
94 ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
95 				  ethr_sint64_t val);
96 #endif
97 
98 #if (defined(ETHR_TRY_INLINE_FUNCS) \
99      || defined(ETHR_ATOMIC_IMPL__) \
100      || defined(ETHR_X86_SSE2_ASM_C__))
101 #  ifdef ETHR_DEBUG
102 #    define ETHR_DW_DBG_ALIGNED__(PTR) \
103        ETHR_ASSERT((((ethr_uint_t) (PTR)) & ETHR_DW_NATMC_ALIGN_MASK__) == 0);
104 #  else
105 #    define ETHR_DW_DBG_ALIGNED__(PTR)
106 #  endif
107 #endif
108 
109 #if defined(ETHR_TRY_INLINE_FUNCS) || defined(ETHR_ATOMIC_IMPL__)
110 
111 #define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_ADDR
112 static ETHR_INLINE ethr_sint_t *
ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t * var)113 ethr_native_dw_atomic_addr(ethr_native_dw_atomic_t *var)
114 {
115     return (ethr_sint_t *) ETHR_DW_NATMC_MEM__(var);
116 }
117 
118 #if defined(ETHR_CMPXCHG8B_PIC_NO_CLOBBER_EBX) && defined(__PIC__) && __PIC__
119 #if ETHR_SIZEOF_PTR != 4
120 #  error unexpected pic issue
121 #endif
122 /*
123  * When position independent code is used in 32-bit mode, the EBX register
124  * is used for storage of global offset table address. When compiling with
125  * an old gcc (< vsn 5) we may not use it as input or output in an inline
126  * asm. We then need to save and restore the EBX register explicitly (for
127  * some reason old gcc compilers didn't provide this service to us).
128  * ETHR_CMPXCHG8B_PIC_NO_CLOBBER_EBX will be defined if we need to
129  * explicitly manage EBX ourselves.
130  *
131  */
132 #  define ETHR_NO_CLOBBER_EBX__ 1
133 #else
134 #  define ETHR_NO_CLOBBER_EBX__ 0
135 #endif
136 
137 #if ETHR_NO_CLOBBER_EBX__ && !defined(ETHR_CMPXCHG8B_REGISTER_SHORTAGE)
138 /* When no optimization is on, we'll run into a register shortage */
139 #  if defined(ETHR_DEBUG) || defined(DEBUG) || defined(VALGRIND) \
140       || defined(GCOV)
141 #    define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 1
142 #  else
143 #    define ETHR_CMPXCHG8B_REGISTER_SHORTAGE 0
144 #  endif
145 #endif
146 
147 
148 #define ETHR_HAVE_ETHR_NATIVE_DW_ATOMIC_CMPXCHG_MB
149 
150 static ETHR_INLINE int
ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t * var,ethr_sint_t * new_value,ethr_sint_t * xchg)151 ethr_native_dw_atomic_cmpxchg_mb(ethr_native_dw_atomic_t *var,
152 				 ethr_sint_t *new_value,
153 				 ethr_sint_t *xchg)
154 {
155     ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
156     char xchgd;
157 
158     ETHR_DW_DBG_ALIGNED__(p);
159 
160 #if ETHR_NO_CLOBBER_EBX__ && ETHR_CMPXCHG8B_REGISTER_SHORTAGE
161     /*
162      * gcc wont let us use ebx as input and we
163      * get a register shortage
164      */
165 
166     __asm__ __volatile__(
167 	"pushl %%ebx\n\t"
168 	"movl (%7), %%ebx\n\t"
169 	"movl 4(%7), %%ecx\n\t"
170 	"lock; cmpxchg8b %0\n\t"
171 	"setz %3\n\t"
172 	"popl %%ebx\n\t"
173 	: "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=c"(xchgd)
174 	: "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "r"(new_value)
175 	: "cc", "memory");
176 
177 #elif ETHR_NO_CLOBBER_EBX__
178     /*
179      * gcc wont let us use ebx as input
180      */
181 
182     __asm__ __volatile__(
183 	"pushl %%ebx\n\t"
184 	"movl %8, %%ebx\n\t"
185 	"lock; cmpxchg8b %0\n\t"
186 	"setz %3\n\t"
187 	"popl %%ebx\n\t"
188 	: "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=q"(xchgd)
189 	: "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "c"(new_value[1]), "r"(new_value[0])
190 	: "cc", "memory");
191 
192 #else
193     /*
194      * gcc lets us place values in the registers where
195      * we want them
196      */
197 
198     __asm__ __volatile__(
199 	"lock; cmpxchg" ETHR_DW_CMPXCHG_SFX__ " %0\n\t"
200 	"setz %3\n\t"
201 	: "=m"(*p), "=d"(xchg[1]), "=a"(xchg[0]), "=q"(xchgd)
202 	: "m"(*p), "1"(xchg[1]), "2"(xchg[0]), "c"(new_value[1]), "b"(new_value[0])
203 	: "cc", "memory");
204 
205 #endif
206 
207     return (int) xchgd;
208 }
209 
210 #undef ETHR_NO_CLOBBER_EBX__
211 
212 #if ETHR_SIZEOF_PTR == 4 && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
213 
214 typedef union {
215     ethr_sint64_t sint64;
216     ethr_sint_t sint[2];
217 } ethr_dw_atomic_no_sse2_convert_t;
218 
219 #define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_READ
220 
221 static ETHR_INLINE ethr_sint64_t
ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t * var)222 ethr_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
223 {
224     if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
225 	return ethr_sse2_native_su_dw_atomic_read(var);
226     else {
227 	ethr_sint_t new_value[2];
228 	ethr_dw_atomic_no_sse2_convert_t xchg;
229 	new_value[0] = new_value[1] = xchg.sint[0] = xchg.sint[1] = 0x83838383;
230 	(void) ethr_native_dw_atomic_cmpxchg_mb(var, new_value, xchg.sint);
231 	return xchg.sint64;
232     }
233 }
234 
235 #define ETHR_HAVE_ETHR_NATIVE_SU_DW_ATOMIC_SET
236 
237 static ETHR_INLINE void
ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t * var,ethr_sint64_t val)238 ethr_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
239 			     ethr_sint64_t val)
240 {
241     if (ETHR_X86_RUNTIME_CONF_HAVE_SSE2__)
242 	ethr_sse2_native_su_dw_atomic_set(var, val);
243     else {
244 	ethr_sint_t xchg[2] = {0, 0};
245 	ethr_dw_atomic_no_sse2_convert_t new_value;
246 	new_value.sint64 = val;
247 	while (!ethr_native_dw_atomic_cmpxchg_mb(var, new_value.sint, xchg));
248     }
249 }
250 
251 #endif /* ETHR_SIZEOF_PTR == 4 */
252 
253 #endif /* ETHR_TRY_INLINE_FUNCS */
254 
255 #if defined(ETHR_X86_SSE2_ASM_C__) \
256     && ETHR_SIZEOF_PTR == 4 \
257     && defined(ETHR_GCC_HAVE_SSE2_ASM_SUPPORT)
258 
259 /*
260  * 8-byte aligned loads and stores of 64-bit values are atomic from
261  * pentium and forward. An ordinary volatile load or store in 32-bit
262  * mode generates two 32-bit operations (at least with gcc-4.1.2 using
263  * -msse2). In order to guarantee one 64-bit load/store operation
264  * from/to memory we load/store via an xmm register using movq.
265  *
266  * Load/store can be achieved using cmpxchg8b, however, using movq is
267  * much faster. Unfortunately we cannot do the same thing in 64-bit
268  * mode; instead, we have to do loads and stores via cmpxchg16b.
269  *
270  * We do not inline these, but instead compile these into a separate
271  * object file using -msse2. This since we don't want to use -msse2 for
272  * the whole system. If we detect sse2 support (pentium4 and forward)
273  * at runtime, we use them; otherwise, we fall back to using cmpxchg8b
274  * for loads and stores. This way the binary can be moved between
275  * processors with and without sse2 support.
276  */
277 
278 ethr_sint64_t
ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t * var)279 ethr_sse2_native_su_dw_atomic_read(ethr_native_dw_atomic_t *var)
280 {
281     ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
282     ethr_sint64_t val;
283     ETHR_DW_DBG_ALIGNED__(p);
284     __asm__ __volatile__("movq %1, %0\n\t" : "=x"(val) : "m"(*p) : "memory");
285     return val;
286 }
287 
288 void
ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t * var,ethr_sint64_t val)289 ethr_sse2_native_su_dw_atomic_set(ethr_native_dw_atomic_t *var,
290 				  ethr_sint64_t val)
291 {
292     ethr_native_dw_ptr_t p = (ethr_native_dw_ptr_t) ETHR_DW_NATMC_MEM__(var);
293     ETHR_DW_DBG_ALIGNED__(p);
294     __asm__ __volatile__("movq %1, %0\n\t" : "=m"(*p) : "x"(val) : "memory");
295 }
296 
297 #endif /* ETHR_X86_SSE2_ASM_C__ */
298 
299 #endif /* ETHR_GCC_HAVE_DW_CMPXCHG_ASM_SUPPORT */
300 
301 #endif /* ETHR_X86_DW_ATOMIC_H__ */
302 
303