1/*
2 * Copyright 2010-2019 Branimir Karadzic. All rights reserved.
3 * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
4 */
5
6#ifndef BX_CPU_H_HEADER_GUARD
7#	error "Must be included from bx/cpu.h!"
8#endif // BX_CPU_H_HEADER_GUARD
9
10#if BX_COMPILER_MSVC
11#	if BX_PLATFORM_WINRT
12#		include <windows.h>
13#	endif // BX_PLATFORM_WINRT
14
15#	if BX_CPU_X86
16#		include <emmintrin.h> // _mm_fence
17#	endif
18
19extern "C" void _ReadBarrier();
20#	pragma intrinsic(_ReadBarrier)
21
22extern "C" void _WriteBarrier();
23#	pragma intrinsic(_WriteBarrier)
24
25extern "C" void _ReadWriteBarrier();
26#	pragma intrinsic(_ReadWriteBarrier)
27
28extern "C" long _InterlockedExchangeAdd(long volatile* _ptr, long _value);
29#	pragma intrinsic(_InterlockedExchangeAdd)
30
31extern "C" int64_t __cdecl _InterlockedExchangeAdd64(int64_t volatile* _ptr, int64_t _value);
32//#	pragma intrinsic(_InterlockedExchangeAdd64)
33
34extern "C" long _InterlockedCompareExchange(long volatile* _ptr, long _exchange, long _comparand);
35#	pragma intrinsic(_InterlockedCompareExchange)
36
37extern "C" int64_t _InterlockedCompareExchange64(int64_t volatile* _ptr, int64_t _exchange, int64_t _comparand);
38#	pragma intrinsic(_InterlockedCompareExchange64)
39
40#if (_MSC_VER == 1800) && !defined(FIXED_592562) && defined (_M_IX86) && !defined (_M_CEE_PURE)
41
42extern "C" long _InterlockedExchange(long volatile* _ptr, long _value);
43#	pragma intrinsic(_InterlockedExchange)
44
45__forceinline static void * _InterlockedExchangePointer_impl(void * volatile * _Target, void * _Value)
46{
47    return (void *)_InterlockedExchange((long volatile *) _Target, (long) _Value);
48}
49#define _InterlockedExchangePointer(p,v)  _InterlockedExchangePointer_impl(p,v)
50
51#else
52
53extern "C" void* _InterlockedExchangePointer(void* volatile* _ptr, void* _value);
54#	pragma intrinsic(_InterlockedExchangePointer)
55
56#endif
57
58#	if BX_PLATFORM_WINRT
59#		define _InterlockedExchangeAdd64 InterlockedExchangeAdd64
60#	endif // BX_PLATFORM_WINRT
61#endif // BX_COMPILER_MSVC
62
63namespace bx
64{
65	inline void readBarrier()
66	{
67#if BX_COMPILER_MSVC
68		_ReadBarrier();
69#else
70		asm volatile("":::"memory");
71#endif // BX_COMPILER
72	}
73
74	inline void writeBarrier()
75	{
76#if BX_COMPILER_MSVC
77		_WriteBarrier();
78#else
79		asm volatile("":::"memory");
80#endif // BX_COMPILER
81	}
82
83	inline void readWriteBarrier()
84	{
85#if BX_COMPILER_MSVC
86		_ReadWriteBarrier();
87#else
88		asm volatile("":::"memory");
89#endif // BX_COMPILER
90	}
91
92	inline void memoryBarrier()
93	{
94#if BX_PLATFORM_WINRT
95		MemoryBarrier();
96#elif BX_COMPILER_MSVC
97		_mm_mfence();
98#else
99		__sync_synchronize();
100#endif // BX_COMPILER
101	}
102
103	template<>
104	inline int32_t atomicCompareAndSwap<int32_t>(volatile int32_t* _ptr, int32_t _old, int32_t _new)
105	{
106#if BX_COMPILER_MSVC
107		return int32_t(_InterlockedCompareExchange( (volatile long*)(_ptr), long(_new), long(_old) ) );
108#else
109		return __sync_val_compare_and_swap( (volatile int32_t*)_ptr, _old, _new);
110#endif // BX_COMPILER
111	}
112
113	template<>
114	inline uint32_t atomicCompareAndSwap<uint32_t>(volatile uint32_t* _ptr, uint32_t _old, uint32_t _new)
115	{
116#if BX_COMPILER_MSVC
117		return uint32_t(_InterlockedCompareExchange( (volatile long*)(_ptr), long(_new), long(_old) ) );
118#else
119		return __sync_val_compare_and_swap( (volatile int32_t*)_ptr, _old, _new);
120#endif // BX_COMPILER
121	}
122
123	template<>
124	inline int64_t atomicCompareAndSwap<int64_t>(volatile int64_t* _ptr, int64_t _old, int64_t _new)
125	{
126#if BX_COMPILER_MSVC
127		return _InterlockedCompareExchange64(_ptr, _new, _old);
128#else
129		return __sync_val_compare_and_swap( (volatile int64_t*)_ptr, _old, _new);
130#endif // BX_COMPILER
131	}
132
133	template<>
134	inline uint64_t atomicCompareAndSwap<uint64_t>(volatile uint64_t* _ptr, uint64_t _old, uint64_t _new)
135	{
136#if BX_COMPILER_MSVC
137		return uint64_t(_InterlockedCompareExchange64( (volatile int64_t*)(_ptr), int64_t(_new), int64_t(_old) ) );
138#else
139		return __sync_val_compare_and_swap( (volatile int64_t*)_ptr, _old, _new);
140#endif // BX_COMPILER
141	}
142
143	template<>
144	inline int32_t atomicFetchAndAdd<int32_t>(volatile int32_t* _ptr, int32_t _add)
145	{
146#if BX_COMPILER_MSVC
147		return _InterlockedExchangeAdd( (volatile long*)_ptr, _add);
148#else
149		return __sync_fetch_and_add(_ptr, _add);
150#endif // BX_COMPILER_
151	}
152
153	template<>
154	inline uint32_t atomicFetchAndAdd<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
155	{
156		return uint32_t(atomicFetchAndAdd<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
157	}
158
159	template<>
160	inline int64_t atomicFetchAndAdd<int64_t>(volatile int64_t* _ptr, int64_t _add)
161	{
162#if BX_COMPILER_MSVC
163#	if _WIN32_WINNT >= 0x600
164		return _InterlockedExchangeAdd64( (volatile int64_t*)_ptr, _add);
165#	else
166		int64_t oldVal;
167		int64_t newVal = *(int64_t volatile*)_ptr;
168		do
169		{
170			oldVal = newVal;
171			newVal = atomicCompareAndSwap<int64_t>(_ptr, oldVal, newVal + _add);
172
173		} while (oldVal != newVal);
174
175		return oldVal;
176#	endif
177#else
178		return __sync_fetch_and_add(_ptr, _add);
179#endif // BX_COMPILER_
180	}
181
182	template<>
183	inline uint64_t atomicFetchAndAdd<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
184	{
185		return uint64_t(atomicFetchAndAdd<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
186	}
187
188	template<>
189	inline int32_t atomicAddAndFetch<int32_t>(volatile int32_t* _ptr, int32_t _add)
190	{
191#if BX_COMPILER_MSVC
192		return atomicFetchAndAdd(_ptr, _add) + _add;
193#else
194		return __sync_add_and_fetch(_ptr, _add);
195#endif // BX_COMPILER_
196	}
197
198	template<>
199	inline int64_t atomicAddAndFetch<int64_t>(volatile int64_t* _ptr, int64_t _add)
200	{
201#if BX_COMPILER_MSVC
202		return atomicFetchAndAdd(_ptr, _add) + _add;
203#else
204		return __sync_add_and_fetch(_ptr, _add);
205#endif // BX_COMPILER_
206	}
207
208	template<>
209	inline uint32_t atomicAddAndFetch<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
210	{
211		return uint32_t(atomicAddAndFetch<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
212	}
213
214	template<>
215	inline uint64_t atomicAddAndFetch<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
216	{
217		return uint64_t(atomicAddAndFetch<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
218	}
219
220	template<>
221	inline int32_t atomicFetchAndSub<int32_t>(volatile int32_t* _ptr, int32_t _sub)
222	{
223#if BX_COMPILER_MSVC
224		return atomicFetchAndAdd(_ptr, -_sub);
225#else
226		return __sync_fetch_and_sub(_ptr, _sub);
227#endif // BX_COMPILER_
228	}
229
230	template<>
231	inline int64_t atomicFetchAndSub<int64_t>(volatile int64_t* _ptr, int64_t _sub)
232	{
233#if BX_COMPILER_MSVC
234		return atomicFetchAndAdd(_ptr, -_sub);
235#else
236		return __sync_fetch_and_sub(_ptr, _sub);
237#endif // BX_COMPILER_
238	}
239
240	template<>
241	inline uint32_t atomicFetchAndSub<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
242	{
243		return uint32_t(atomicFetchAndSub<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
244	}
245
246	template<>
247	inline uint64_t atomicFetchAndSub<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
248	{
249		return uint64_t(atomicFetchAndSub<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
250	}
251
252	template<>
253	inline int32_t atomicSubAndFetch<int32_t>(volatile int32_t* _ptr, int32_t _sub)
254	{
255#if BX_COMPILER_MSVC
256		return atomicFetchAndAdd(_ptr, -_sub) - _sub;
257#else
258		return __sync_sub_and_fetch(_ptr, _sub);
259#endif // BX_COMPILER_
260	}
261
262	template<>
263	inline int64_t atomicSubAndFetch<int64_t>(volatile int64_t* _ptr, int64_t _sub)
264	{
265#if BX_COMPILER_MSVC
266		return atomicFetchAndAdd(_ptr, -_sub) - _sub;
267#else
268		return __sync_sub_and_fetch(_ptr, _sub);
269#endif // BX_COMPILER_
270	}
271
272	template<>
273	inline uint32_t atomicSubAndFetch<uint32_t>(volatile uint32_t* _ptr, uint32_t _add)
274	{
275		return uint32_t(atomicSubAndFetch<int32_t>( (volatile int32_t*)_ptr, int32_t(_add) ) );
276	}
277
278	template<>
279	inline uint64_t atomicSubAndFetch<uint64_t>(volatile uint64_t* _ptr, uint64_t _add)
280	{
281		return uint64_t(atomicSubAndFetch<int64_t>( (volatile int64_t*)_ptr, int64_t(_add) ) );
282	}
283
284	template<typename Ty>
285	inline Ty atomicFetchTestAndAdd(volatile Ty* _ptr, Ty _test, Ty _value)
286	{
287		Ty oldVal;
288		Ty newVal = *_ptr;
289		do
290		{
291			oldVal = newVal;
292			newVal = atomicCompareAndSwap<Ty>(_ptr, oldVal, newVal >= _test ? _test : newVal+_value);
293
294		} while (oldVal != newVal);
295
296		return oldVal;
297	}
298
299	template<typename Ty>
300	inline Ty atomicFetchTestAndSub(volatile Ty* _ptr, Ty _test, Ty _value)
301	{
302		Ty oldVal;
303		Ty newVal = *_ptr;
304		do
305		{
306			oldVal = newVal;
307			newVal = atomicCompareAndSwap<Ty>(_ptr, oldVal, newVal <= _test ? _test : newVal-_value);
308
309		} while (oldVal != newVal);
310
311		return oldVal;
312	}
313
314	template<typename Ty>
315	Ty atomicFetchAndAddsat(volatile Ty* _ptr, Ty _value, Ty _max)
316	{
317		Ty oldVal;
318		Ty newVal = *_ptr;
319		do
320		{
321			oldVal = newVal;
322			newVal = atomicCompareAndSwap<Ty>(_ptr, oldVal, newVal >= _max ? _max : min(_max, newVal+_value) );
323
324		} while (oldVal != newVal && oldVal != _max);
325
326		return oldVal;
327	}
328
329	template<typename Ty>
330	Ty atomicFetchAndSubsat(volatile Ty* _ptr, Ty _value, Ty _min)
331	{
332		Ty oldVal;
333		Ty newVal = *_ptr;
334		do
335		{
336			oldVal = newVal;
337			newVal = atomicCompareAndSwap<Ty>(_ptr, oldVal, newVal <= _min ? _min : max(_min, newVal-_value) );
338
339		} while (oldVal != newVal && oldVal != _min);
340
341		return oldVal;
342	}
343
344	inline void* atomicExchangePtr(void** _ptr, void* _new)
345	{
346#if BX_COMPILER_MSVC
347		return _InterlockedExchangePointer(_ptr, _new);
348#else
349		return __sync_lock_test_and_set(_ptr, _new);
350#endif // BX_COMPILER
351	}
352
353} // namespace bx
354