xref: /freebsd/sys/contrib/ck/include/gcc/x86/ck_pr.h (revision b0b1dbdd)
1 /*
2  * Copyright 2009-2015 Samy Al Bahra.
3  * Copyright 2011 Devon H. O'Dell <devon.odell@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #ifndef CK_PR_X86_H
29 #define CK_PR_X86_H
30 
31 #ifndef CK_PR_H
32 #error Do not include this file directly, use ck_pr.h
33 #endif
34 
35 #include <ck_cc.h>
36 #include <ck_md.h>
37 #include <ck_stdint.h>
38 
39 /*
40  * The following represent supported atomic operations.
41  * These operations may be emulated.
42  */
43 #include "ck_f_pr.h"
44 
45 /* Minimum requirements for the CK_PR interface are met. */
46 #define CK_F_PR
47 
48 #ifdef CK_MD_UMP
49 #define CK_PR_LOCK_PREFIX
50 #else
51 #define CK_PR_LOCK_PREFIX "lock "
52 #endif
53 
54 /*
55  * Prevent speculative execution in busy-wait loops (P4 <=)
56  * or "predefined delay".
57  */
58 CK_CC_INLINE static void
59 ck_pr_stall(void)
60 {
61 	__asm__ __volatile__("pause" ::: "memory");
62 	return;
63 }
64 
65 #define CK_PR_FENCE(T, I)				\
66 	CK_CC_INLINE static void			\
67 	ck_pr_fence_strict_##T(void)			\
68 	{						\
69 		__asm__ __volatile__(I ::: "memory");	\
70 	}
71 
72 CK_PR_FENCE(atomic, "sfence")
73 CK_PR_FENCE(atomic_store, "sfence")
74 CK_PR_FENCE(atomic_load, "mfence")
75 CK_PR_FENCE(store_atomic, "sfence")
76 CK_PR_FENCE(load_atomic, "mfence")
77 CK_PR_FENCE(load, "lfence")
78 CK_PR_FENCE(load_store, "mfence")
79 CK_PR_FENCE(store, "sfence")
80 CK_PR_FENCE(store_load, "mfence")
81 CK_PR_FENCE(memory, "mfence")
82 CK_PR_FENCE(release, "mfence")
83 CK_PR_FENCE(acquire, "mfence")
84 CK_PR_FENCE(acqrel, "mfence")
85 CK_PR_FENCE(lock, "mfence")
86 CK_PR_FENCE(unlock, "mfence")
87 
88 #undef CK_PR_FENCE
89 
90 /*
91  * Atomic fetch-and-store operations.
92  */
93 #define CK_PR_FAS(S, M, T, C, I)				\
94 	CK_CC_INLINE static T					\
95 	ck_pr_fas_##S(M *target, T v)				\
96 	{							\
97 		__asm__ __volatile__(I " %0, %1"		\
98 					: "+m" (*(C *)target),	\
99 					  "+q" (v)		\
100 					:			\
101 					: "memory");		\
102 		return v;					\
103 	}
104 
105 CK_PR_FAS(ptr, void, void *, char, "xchgl")
106 
107 #define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I)
108 
109 CK_PR_FAS_S(char, char, "xchgb")
110 CK_PR_FAS_S(uint, unsigned int, "xchgl")
111 CK_PR_FAS_S(int, int, "xchgl")
112 CK_PR_FAS_S(32, uint32_t, "xchgl")
113 CK_PR_FAS_S(16, uint16_t, "xchgw")
114 CK_PR_FAS_S(8,  uint8_t,  "xchgb")
115 
116 #undef CK_PR_FAS_S
117 #undef CK_PR_FAS
118 
119 #define CK_PR_LOAD(S, M, T, C, I)					\
120 	CK_CC_INLINE static T						\
121 	ck_pr_md_load_##S(const M *target)				\
122 	{								\
123 		T r;							\
124 		__asm__ __volatile__(I " %1, %0"			\
125 					: "=q" (r)			\
126 					: "m"  (*(const C *)target)	\
127 					: "memory");			\
128 		return (r);						\
129 	}
130 
131 CK_PR_LOAD(ptr, void, void *, char, "movl")
132 
133 #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I)
134 
135 CK_PR_LOAD_S(char, char, "movb")
136 CK_PR_LOAD_S(uint, unsigned int, "movl")
137 CK_PR_LOAD_S(int, int, "movl")
138 CK_PR_LOAD_S(32, uint32_t, "movl")
139 CK_PR_LOAD_S(16, uint16_t, "movw")
140 CK_PR_LOAD_S(8,  uint8_t,  "movb")
141 
142 #undef CK_PR_LOAD_S
143 #undef CK_PR_LOAD
144 
145 #define CK_PR_STORE(S, M, T, C, I)				\
146 	CK_CC_INLINE static void				\
147 	ck_pr_md_store_##S(M *target, T v)			\
148 	{							\
149 		__asm__ __volatile__(I " %1, %0"		\
150 					: "=m" (*(C *)target)	\
151 					: CK_CC_IMM "q" (v)	\
152 					: "memory");		\
153 		return;						\
154 	}
155 
156 CK_PR_STORE(ptr, void, const void *, char, "movl")
157 
158 #define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I)
159 
160 CK_PR_STORE_S(char, char, "movb")
161 CK_PR_STORE_S(uint, unsigned int, "movl")
162 CK_PR_STORE_S(int, int, "movl")
163 CK_PR_STORE_S(32, uint32_t, "movl")
164 CK_PR_STORE_S(16, uint16_t, "movw")
165 CK_PR_STORE_S(8,  uint8_t, "movb")
166 
167 #undef CK_PR_STORE_S
168 #undef CK_PR_STORE
169 
170 /*
171  * Atomic fetch-and-add operations.
172  */
173 #define CK_PR_FAA(S, M, T, C, I)					\
174 	CK_CC_INLINE static T						\
175 	ck_pr_faa_##S(M *target, T d)					\
176 	{								\
177 		__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0"	\
178 					: "+m" (*(C *)target),		\
179 					  "+q" (d)			\
180 					:				\
181 					: "memory", "cc");		\
182 		return (d);						\
183 	}
184 
185 CK_PR_FAA(ptr, void, uintptr_t, char, "xaddl")
186 
187 #define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I)
188 
189 CK_PR_FAA_S(char, char, "xaddb")
190 CK_PR_FAA_S(uint, unsigned int, "xaddl")
191 CK_PR_FAA_S(int, int, "xaddl")
192 CK_PR_FAA_S(32, uint32_t, "xaddl")
193 CK_PR_FAA_S(16, uint16_t, "xaddw")
194 CK_PR_FAA_S(8,  uint8_t,  "xaddb")
195 
196 #undef CK_PR_FAA_S
197 #undef CK_PR_FAA
198 
199 /*
200  * Atomic store-only unary operations.
201  */
202 #define CK_PR_UNARY(K, S, T, C, I)				\
203 	CK_PR_UNARY_R(K, S, T, C, I)				\
204 	CK_PR_UNARY_V(K, S, T, C, I)
205 
206 #define CK_PR_UNARY_R(K, S, T, C, I)				\
207 	CK_CC_INLINE static void				\
208 	ck_pr_##K##_##S(T *target)				\
209 	{							\
210 		__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0"	\
211 					: "+m" (*(C *)target)	\
212 					:			\
213 					: "memory", "cc");	\
214 		return;						\
215 	}
216 
217 #define CK_PR_UNARY_V(K, S, T, C, I)					\
218 	CK_CC_INLINE static void					\
219 	ck_pr_##K##_##S##_zero(T *target, bool *r)			\
220 	{								\
221 		__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1"	\
222 					: "+m" (*(C *)target),		\
223 					  "=m" (*r)			\
224 					:				\
225 					: "memory", "cc");		\
226 		return;							\
227 	}
228 
229 
230 #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I)
231 
232 #define CK_PR_GENERATE(K)				\
233 	CK_PR_UNARY(K, ptr, void, char, #K "l") 	\
234 	CK_PR_UNARY_S(K, char, char, #K "b")		\
235 	CK_PR_UNARY_S(K, int, int, #K "l")		\
236 	CK_PR_UNARY_S(K, uint, unsigned int, #K "l")	\
237 	CK_PR_UNARY_S(K, 32, uint32_t, #K "l")		\
238 	CK_PR_UNARY_S(K, 16, uint16_t, #K "w")		\
239 	CK_PR_UNARY_S(K, 8, uint8_t, #K "b")
240 
241 CK_PR_GENERATE(inc)
242 CK_PR_GENERATE(dec)
243 CK_PR_GENERATE(neg)
244 
245 /* not does not affect condition flags. */
246 #undef CK_PR_UNARY_V
247 #define CK_PR_UNARY_V(a, b, c, d, e)
248 CK_PR_GENERATE(not)
249 
250 #undef CK_PR_GENERATE
251 #undef CK_PR_UNARY_S
252 #undef CK_PR_UNARY_V
253 #undef CK_PR_UNARY_R
254 #undef CK_PR_UNARY
255 
256 /*
257  * Atomic store-only binary operations.
258  */
259 #define CK_PR_BINARY(K, S, M, T, C, I)					\
260 	CK_CC_INLINE static void					\
261 	ck_pr_##K##_##S(M *target, T d)					\
262 	{								\
263 		__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0"	\
264 					: "+m" (*(C *)target)		\
265 					: CK_CC_IMM "q" (d)		\
266 					: "memory", "cc");		\
267 		return;							\
268 	}
269 
270 #define CK_PR_BINARY_S(K, S, T, I) CK_PR_BINARY(K, S, T, T, T, I)
271 
272 #define CK_PR_GENERATE(K)					\
273 	CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "l")	\
274 	CK_PR_BINARY_S(K, char, char, #K "b")			\
275 	CK_PR_BINARY_S(K, int, int, #K "l")			\
276 	CK_PR_BINARY_S(K, uint, unsigned int, #K "l")		\
277 	CK_PR_BINARY_S(K, 32, uint32_t, #K "l")			\
278 	CK_PR_BINARY_S(K, 16, uint16_t, #K "w")			\
279 	CK_PR_BINARY_S(K, 8, uint8_t, #K "b")
280 
281 CK_PR_GENERATE(add)
282 CK_PR_GENERATE(sub)
283 CK_PR_GENERATE(and)
284 CK_PR_GENERATE(or)
285 CK_PR_GENERATE(xor)
286 
287 #undef CK_PR_GENERATE
288 #undef CK_PR_BINARY_S
289 #undef CK_PR_BINARY
290 
291 /*
292  * Atomic compare and swap.
293  */
294 #define CK_PR_CAS(S, M, T, C, I)						\
295 	CK_CC_INLINE static bool						\
296 	ck_pr_cas_##S(M *target, T compare, T set)				\
297 	{									\
298 		bool z;								\
299 		__asm__ __volatile__(CK_PR_LOCK_PREFIX I " %2, %0; setz %1"	\
300 					: "+m"  (*(C *)target),			\
301 					  "=a"  (z)				\
302 					: "q"   (set),				\
303 					  "a"   (compare)			\
304 					: "memory", "cc");			\
305 		return z;							\
306 	}
307 
308 CK_PR_CAS(ptr, void, void *, char, "cmpxchgl")
309 
310 #define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I)
311 
312 CK_PR_CAS_S(char, char, "cmpxchgb")
313 CK_PR_CAS_S(int, int, "cmpxchgl")
314 CK_PR_CAS_S(uint, unsigned int, "cmpxchgl")
315 CK_PR_CAS_S(32, uint32_t, "cmpxchgl")
316 CK_PR_CAS_S(16, uint16_t, "cmpxchgw")
317 CK_PR_CAS_S(8,  uint8_t,  "cmpxchgb")
318 
319 #undef CK_PR_CAS_S
320 #undef CK_PR_CAS
321 
322 /*
323  * Compare and swap, set *v to old value of target.
324  */
325 #define CK_PR_CAS_O(S, M, T, C, I, R)						\
326 	CK_CC_INLINE static bool						\
327 	ck_pr_cas_##S##_value(M *target, T compare, T set, M *v)		\
328 	{									\
329 		bool z;								\
330 		__asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;"	\
331 				     "mov %% " R ", %2;"			\
332 				     "setz %1;"					\
333 					: "+m"  (*(C *)target),			\
334 					  "=a"  (z),				\
335 					  "=m"  (*(C *)v)			\
336 					: "q"   (set),				\
337 					  "a"   (compare)			\
338 					: "memory", "cc");			\
339 		return (bool)z;							\
340 	}
341 
342 CK_PR_CAS_O(ptr, void, void *, char, "l", "eax")
343 
344 #define CK_PR_CAS_O_S(S, T, I, R)	\
345 	CK_PR_CAS_O(S, T, T, T, I, R)
346 
347 CK_PR_CAS_O_S(char, char, "b", "al")
348 CK_PR_CAS_O_S(int, int, "l", "eax")
349 CK_PR_CAS_O_S(uint, unsigned int, "l", "eax")
350 CK_PR_CAS_O_S(32, uint32_t, "l", "eax")
351 CK_PR_CAS_O_S(16, uint16_t, "w", "ax")
352 CK_PR_CAS_O_S(8,  uint8_t,  "b", "al")
353 
354 #undef CK_PR_CAS_O_S
355 #undef CK_PR_CAS_O
356 
357 /*
358  * Atomic bit test operations.
359  */
360 #define CK_PR_BT(K, S, T, P, C, I)					\
361 	CK_CC_INLINE static bool					\
362 	ck_pr_##K##_##S(T *target, unsigned int b)			\
363 	{								\
364 		bool c;							\
365 		__asm__ __volatile__(CK_PR_LOCK_PREFIX I "; setc %1"	\
366 					: "+m" (*(C *)target),		\
367 					  "=q" (c)			\
368 					: "q"  ((P)b)			\
369 					: "memory", "cc");		\
370 		return (bool)c;						\
371 	}
372 
373 #define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I)
374 
375 #define CK_PR_GENERATE(K)					\
376 	CK_PR_BT(K, ptr, void, uint32_t, char, #K "l %2, %0")	\
377 	CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0")	\
378 	CK_PR_BT_S(K, int, int, #K "l %2, %0")			\
379 	CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0")		\
380 	CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0")
381 
382 CK_PR_GENERATE(btc)
383 CK_PR_GENERATE(bts)
384 CK_PR_GENERATE(btr)
385 
386 #undef CK_PR_GENERATE
387 #undef CK_PR_BT
388 
389 #endif /* CK_PR_X86_H */
390 
391