1 #pragma once
2 
3 #include <stdbool.h>
4 #include <stddef.h>
5 #include <stdint.h>
6 
7 /* SSE-specific headers */
8 #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
9 	#include <xmmintrin.h>
10 #endif
11 
12 /* ARM-specific headers */
13 #if defined(__ARM_ACLE)
14 	#include <arm_acle.h>
15 #endif
16 
17 /* MSVC-specific headers */
18 #ifdef _MSC_VER
19 	#include <intrin.h>
20 #endif
21 
22 
23 #if defined(__wasm__) && defined(__clang__)
24 	/*
25 	 * Clang for WebAssembly target lacks stdatomic.h header,
26 	 * even though it supports the necessary low-level intrinsics.
27 	 * Thus, we implement pthreadpool atomic functions on top of
28 	 * low-level Clang-specific interfaces for this target.
29 	 */
30 
31 	typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
32 	typedef _Atomic(size_t)   pthreadpool_atomic_size_t;
33 	typedef _Atomic(void*)    pthreadpool_atomic_void_p;
34 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)35 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
36 		pthreadpool_atomic_uint32_t* address)
37 	{
38 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
39 	}
40 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)41 	static inline size_t pthreadpool_load_relaxed_size_t(
42 		pthreadpool_atomic_size_t* address)
43 	{
44 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
45 	}
46 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)47 	static inline void* pthreadpool_load_relaxed_void_p(
48 		pthreadpool_atomic_void_p* address)
49 	{
50 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
51 	}
52 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)53 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
54 		pthreadpool_atomic_uint32_t* address)
55 	{
56 		return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
57 	}
58 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)59 	static inline size_t pthreadpool_load_acquire_size_t(
60 		pthreadpool_atomic_size_t* address)
61 	{
62 		return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
63 	}
64 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)65 	static inline void pthreadpool_store_relaxed_uint32_t(
66 		pthreadpool_atomic_uint32_t* address,
67 		uint32_t value)
68 	{
69 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
70 	}
71 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)72 	static inline void pthreadpool_store_relaxed_size_t(
73 		pthreadpool_atomic_size_t* address,
74 		size_t value)
75 	{
76 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
77 	}
78 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)79 	static inline void pthreadpool_store_relaxed_void_p(
80 		pthreadpool_atomic_void_p* address,
81 		void* value)
82 	{
83 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
84 	}
85 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)86 	static inline void pthreadpool_store_release_uint32_t(
87 		pthreadpool_atomic_uint32_t* address,
88 		uint32_t value)
89 	{
90 		__c11_atomic_store(address, value, __ATOMIC_RELEASE);
91 	}
92 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)93 	static inline void pthreadpool_store_release_size_t(
94 		pthreadpool_atomic_size_t* address,
95 		size_t value)
96 	{
97 		__c11_atomic_store(address, value, __ATOMIC_RELEASE);
98 	}
99 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)100 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
101 		pthreadpool_atomic_size_t* address)
102 	{
103 		return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1;
104 	}
105 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)106 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
107 		pthreadpool_atomic_size_t* address)
108 	{
109 		return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1;
110 	}
111 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)112 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
113 		pthreadpool_atomic_size_t* value)
114 	{
115 		size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED);
116 		while (actual_value != 0) {
117 			if (__c11_atomic_compare_exchange_weak(
118 				value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
119 			{
120 				return true;
121 			}
122 		}
123 		return false;
124 	}
125 
pthreadpool_fence_acquire()126 	static inline void pthreadpool_fence_acquire() {
127 		__c11_atomic_thread_fence(__ATOMIC_ACQUIRE);
128 	}
129 
pthreadpool_fence_release()130 	static inline void pthreadpool_fence_release() {
131 		__c11_atomic_thread_fence(__ATOMIC_RELEASE);
132 	}
133 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
134 	#include <stdatomic.h>
135 
136 	typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
137 	typedef _Atomic(size_t)   pthreadpool_atomic_size_t;
138 	typedef _Atomic(void*)    pthreadpool_atomic_void_p;
139 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)140 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
141 		pthreadpool_atomic_uint32_t* address)
142 	{
143 		return atomic_load_explicit(address, memory_order_relaxed);
144 	}
145 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)146 	static inline size_t pthreadpool_load_relaxed_size_t(
147 		pthreadpool_atomic_size_t* address)
148 	{
149 		return atomic_load_explicit(address, memory_order_relaxed);
150 	}
151 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)152 	static inline void* pthreadpool_load_relaxed_void_p(
153 		pthreadpool_atomic_void_p* address)
154 	{
155 		return atomic_load_explicit(address, memory_order_relaxed);
156 	}
157 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)158 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
159 		pthreadpool_atomic_uint32_t* address)
160 	{
161 		return atomic_load_explicit(address, memory_order_acquire);
162 	}
163 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)164 	static inline size_t pthreadpool_load_acquire_size_t(
165 		pthreadpool_atomic_size_t* address)
166 	{
167 		return atomic_load_explicit(address, memory_order_acquire);
168 	}
169 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)170 	static inline void pthreadpool_store_relaxed_uint32_t(
171 		pthreadpool_atomic_uint32_t* address,
172 		uint32_t value)
173 	{
174 		atomic_store_explicit(address, value, memory_order_relaxed);
175 	}
176 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)177 	static inline void pthreadpool_store_relaxed_size_t(
178 		pthreadpool_atomic_size_t* address,
179 		size_t value)
180 	{
181 		atomic_store_explicit(address, value, memory_order_relaxed);
182 	}
183 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)184 	static inline void pthreadpool_store_relaxed_void_p(
185 		pthreadpool_atomic_void_p* address,
186 		void* value)
187 	{
188 		atomic_store_explicit(address, value, memory_order_relaxed);
189 	}
190 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)191 	static inline void pthreadpool_store_release_uint32_t(
192 		pthreadpool_atomic_uint32_t* address,
193 		uint32_t value)
194 	{
195 		atomic_store_explicit(address, value, memory_order_release);
196 	}
197 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)198 	static inline void pthreadpool_store_release_size_t(
199 		pthreadpool_atomic_size_t* address,
200 		size_t value)
201 	{
202 		atomic_store_explicit(address, value, memory_order_release);
203 	}
204 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)205 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
206 		pthreadpool_atomic_size_t* address)
207 	{
208 		return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1;
209 	}
210 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)211 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
212 		pthreadpool_atomic_size_t* address)
213 	{
214 		return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1;
215 	}
216 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)217 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
218 		pthreadpool_atomic_size_t* value)
219 	{
220 		#if defined(__clang__) && (defined(__arm__) || defined(__aarch64__))
221 			size_t actual_value;
222 			do {
223 				actual_value = __builtin_arm_ldrex((const volatile size_t*) value);
224 				if (actual_value == 0) {
225 					__builtin_arm_clrex();
226 					return false;
227 				}
228 			} while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0);
229 			return true;
230 		#else
231 			size_t actual_value = pthreadpool_load_relaxed_size_t(value);
232 			while (actual_value != 0) {
233 				if (atomic_compare_exchange_weak_explicit(
234 					value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed))
235 				{
236 					return true;
237 				}
238 			}
239 			return false;
240 		#endif
241 	}
242 
pthreadpool_fence_acquire()243 	static inline void pthreadpool_fence_acquire() {
244 		atomic_thread_fence(memory_order_acquire);
245 	}
246 
pthreadpool_fence_release()247 	static inline void pthreadpool_fence_release() {
248 		atomic_thread_fence(memory_order_release);
249 	}
250 #elif defined(__GNUC__)
251 	typedef uint32_t volatile pthreadpool_atomic_uint32_t;
252 	typedef size_t volatile   pthreadpool_atomic_size_t;
253 	typedef void* volatile    pthreadpool_atomic_void_p;
254 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)255 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
256 		pthreadpool_atomic_uint32_t* address)
257 	{
258 		return *address;
259 	}
260 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)261 	static inline size_t pthreadpool_load_relaxed_size_t(
262 		pthreadpool_atomic_size_t* address)
263 	{
264 		return *address;
265 	}
266 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)267 	static inline void* pthreadpool_load_relaxed_void_p(
268 		pthreadpool_atomic_void_p* address)
269 	{
270 		return *address;
271 	}
272 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)273 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
274 		pthreadpool_atomic_uint32_t* address)
275 	{
276 		return *address;
277 	}
278 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)279 	static inline size_t pthreadpool_load_acquire_size_t(
280 		pthreadpool_atomic_size_t* address)
281 	{
282 		return *address;
283 	}
284 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)285 	static inline void pthreadpool_store_relaxed_uint32_t(
286 		pthreadpool_atomic_uint32_t* address,
287 		uint32_t value)
288 	{
289 		*address = value;
290 	}
291 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)292 	static inline void pthreadpool_store_relaxed_size_t(
293 		pthreadpool_atomic_size_t* address,
294 		size_t value)
295 	{
296 		*address = value;
297 	}
298 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)299 	static inline void pthreadpool_store_relaxed_void_p(
300 		pthreadpool_atomic_void_p* address,
301 		void* value)
302 	{
303 		*address = value;
304 	}
305 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)306 	static inline void pthreadpool_store_release_uint32_t(
307 		pthreadpool_atomic_uint32_t* address,
308 		uint32_t value)
309 	{
310 		*address = value;
311 	}
312 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)313 	static inline void pthreadpool_store_release_size_t(
314 		pthreadpool_atomic_size_t* address,
315 		size_t value)
316 	{
317 		*address = value;
318 	}
319 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)320 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
321 		pthreadpool_atomic_size_t* address)
322 	{
323 		return __sync_sub_and_fetch(address, 1);
324 	}
325 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)326 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
327 		pthreadpool_atomic_size_t* address)
328 	{
329 		return __sync_sub_and_fetch(address, 1);
330 	}
331 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)332 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
333 		pthreadpool_atomic_size_t* value)
334 	{
335 		size_t actual_value = *value;
336 		while (actual_value != 0) {
337 			const size_t new_value = actual_value - 1;
338 			const size_t expected_value = actual_value;
339 			actual_value = __sync_val_compare_and_swap(value, expected_value, new_value);
340 			if (actual_value == expected_value) {
341 				return true;
342 			}
343 		}
344 		return false;
345 	}
346 
pthreadpool_fence_acquire()347 	static inline void pthreadpool_fence_acquire() {
348 		__sync_synchronize();
349 	}
350 
pthreadpool_fence_release()351 	static inline void pthreadpool_fence_release() {
352 		__sync_synchronize();
353 	}
354 #elif defined(_MSC_VER) && defined(_M_X64)
355 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
356 	typedef volatile size_t   pthreadpool_atomic_size_t;
357 	typedef void *volatile    pthreadpool_atomic_void_p;
358 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)359 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
360 		pthreadpool_atomic_uint32_t* address)
361 	{
362 		return *address;
363 	}
364 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)365 	static inline size_t pthreadpool_load_relaxed_size_t(
366 		pthreadpool_atomic_size_t* address)
367 	{
368 		return *address;
369 	}
370 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)371 	static inline void* pthreadpool_load_relaxed_void_p(
372 		pthreadpool_atomic_void_p* address)
373 	{
374 		return *address;
375 	}
376 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)377 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
378 		pthreadpool_atomic_uint32_t* address)
379 	{
380 		/* x86-64 loads always have acquire semantics; use only a compiler barrier */
381 		const uint32_t value = *address;
382 		_ReadBarrier();
383 		return value;
384 	}
385 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)386 	static inline size_t pthreadpool_load_acquire_size_t(
387 		pthreadpool_atomic_size_t* address)
388 	{
389 		/* x86-64 loads always have acquire semantics; use only a compiler barrier */
390 		const size_t value = *address;
391 		_ReadBarrier();
392 		return value;
393 	}
394 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)395 	static inline void pthreadpool_store_relaxed_uint32_t(
396 		pthreadpool_atomic_uint32_t* address,
397 		uint32_t value)
398 	{
399 		*address = value;
400 	}
401 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)402 	static inline void pthreadpool_store_relaxed_size_t(
403 		pthreadpool_atomic_size_t* address,
404 		size_t value)
405 	{
406 		*address = value;
407 	}
408 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)409 	static inline void pthreadpool_store_relaxed_void_p(
410 		pthreadpool_atomic_void_p* address,
411 		void* value)
412 	{
413 		*address = value;
414 	}
415 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)416 	static inline void pthreadpool_store_release_uint32_t(
417 		pthreadpool_atomic_uint32_t* address,
418 		uint32_t value)
419 	{
420 		/* x86-64 stores always have release semantics; use only a compiler barrier */
421 		_WriteBarrier();
422 		*address = value;
423 	}
424 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)425 	static inline void pthreadpool_store_release_size_t(
426 		pthreadpool_atomic_size_t* address,
427 		size_t value)
428 	{
429 		/* x86-64 stores always have release semantics; use only a compiler barrier */
430 		_WriteBarrier();
431 		*address = value;
432 	}
433 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)434 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
435 		pthreadpool_atomic_size_t* address)
436 	{
437 		return (size_t) _InterlockedDecrement64((volatile __int64*) address);
438 	}
439 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)440 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
441 		pthreadpool_atomic_size_t* address)
442 	{
443 		return (size_t) _InterlockedDecrement64((volatile __int64*) address);
444 	}
445 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)446 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
447 		pthreadpool_atomic_size_t* value)
448 	{
449 		size_t actual_value = *value;
450 		while (actual_value != 0) {
451 			const size_t new_value = actual_value - 1;
452 			const size_t expected_value = actual_value;
453 			actual_value = _InterlockedCompareExchange64(
454 				(volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
455 			if (actual_value == expected_value) {
456 				return true;
457 			}
458 		}
459 		return false;
460 	}
461 
pthreadpool_fence_acquire()462 	static inline void pthreadpool_fence_acquire() {
463 		_mm_lfence();
464 		_ReadBarrier();
465 	}
466 
pthreadpool_fence_release()467 	static inline void pthreadpool_fence_release() {
468 		_WriteBarrier();
469 		_mm_sfence();
470 	}
471 #elif defined(_MSC_VER) && defined(_M_IX86)
472 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
473 	typedef volatile size_t   pthreadpool_atomic_size_t;
474 	typedef void *volatile    pthreadpool_atomic_void_p;
475 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)476 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
477 		pthreadpool_atomic_uint32_t* address)
478 	{
479 		return *address;
480 	}
481 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)482 	static inline size_t pthreadpool_load_relaxed_size_t(
483 		pthreadpool_atomic_size_t* address)
484 	{
485 		return *address;
486 	}
487 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)488 	static inline void* pthreadpool_load_relaxed_void_p(
489 		pthreadpool_atomic_void_p* address)
490 	{
491 		return *address;
492 	}
493 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)494 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
495 		pthreadpool_atomic_uint32_t* address)
496 	{
497 		/* x86 loads always have acquire semantics; use only a compiler barrier */
498 		const uint32_t value = *address;
499 		_ReadBarrier();
500 		return value;
501 	}
502 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)503 	static inline size_t pthreadpool_load_acquire_size_t(
504 		pthreadpool_atomic_size_t* address)
505 	{
506 		/* x86 loads always have acquire semantics; use only a compiler barrier */
507 		const size_t value = *address;
508 		_ReadBarrier();
509 		return value;
510 	}
511 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)512 	static inline void pthreadpool_store_relaxed_uint32_t(
513 		pthreadpool_atomic_uint32_t* address,
514 		uint32_t value)
515 	{
516 		*address = value;
517 	}
518 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)519 	static inline void pthreadpool_store_relaxed_size_t(
520 		pthreadpool_atomic_size_t* address,
521 		size_t value)
522 	{
523 		*address = value;
524 	}
525 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)526 	static inline void pthreadpool_store_relaxed_void_p(
527 		pthreadpool_atomic_void_p* address,
528 		void* value)
529 	{
530 		*address = value;
531 	}
532 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)533 	static inline void pthreadpool_store_release_uint32_t(
534 		pthreadpool_atomic_uint32_t* address,
535 		uint32_t value)
536 	{
537 		/* x86 stores always have release semantics; use only a compiler barrier */
538 		_WriteBarrier();
539 		*address = value;
540 	}
541 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)542 	static inline void pthreadpool_store_release_size_t(
543 		pthreadpool_atomic_size_t* address,
544 		size_t value)
545 	{
546 		/* x86 stores always have release semantics; use only a compiler barrier */
547 		_WriteBarrier();
548 		*address = value;
549 	}
550 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)551 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
552 		pthreadpool_atomic_size_t* address)
553 	{
554 		return (size_t) _InterlockedDecrement((volatile long*) address);
555 	}
556 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)557 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
558 		pthreadpool_atomic_size_t* address)
559 	{
560 		return (size_t) _InterlockedDecrement((volatile long*) address);
561 	}
562 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)563 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
564 		pthreadpool_atomic_size_t* value)
565 	{
566 		size_t actual_value = *value;
567 		while (actual_value != 0) {
568 			const size_t new_value = actual_value - 1;
569 			const size_t expected_value = actual_value;
570 			actual_value = _InterlockedCompareExchange(
571 				(volatile long*) value, (long) new_value, (long) expected_value);
572 			if (actual_value == expected_value) {
573 				return true;
574 			}
575 		}
576 		return false;
577 	}
578 
pthreadpool_fence_acquire()579 	static inline void pthreadpool_fence_acquire() {
580 		_mm_lfence();
581 	}
582 
pthreadpool_fence_release()583 	static inline void pthreadpool_fence_release() {
584 		_mm_sfence();
585 	}
586 #elif defined(_MSC_VER) && defined(_M_ARM64)
587 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
588 	typedef volatile size_t   pthreadpool_atomic_size_t;
589 	typedef void *volatile    pthreadpool_atomic_void_p;
590 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)591 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
592 		pthreadpool_atomic_uint32_t* address)
593 	{
594 		return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
595 	}
596 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)597 	static inline size_t pthreadpool_load_relaxed_size_t(
598 		pthreadpool_atomic_size_t* address)
599 	{
600 		return (size_t) __iso_volatile_load64((const volatile __int64*) address);
601 	}
602 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)603 	static inline void* pthreadpool_load_relaxed_void_p(
604 		pthreadpool_atomic_void_p* address)
605 	{
606 		return (void*) __iso_volatile_load64((const volatile __int64*) address);
607 	}
608 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)609 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
610 		pthreadpool_atomic_uint32_t* address)
611 	{
612 		return (uint32_t) __ldar32((volatile unsigned __int32*) address);
613 	}
614 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)615 	static inline size_t pthreadpool_load_acquire_size_t(
616 		pthreadpool_atomic_size_t* address)
617 	{
618 		return (size_t) __ldar64((volatile unsigned __int64*) address);
619 	}
620 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)621 	static inline void pthreadpool_store_relaxed_uint32_t(
622 		pthreadpool_atomic_uint32_t* address,
623 		uint32_t value)
624 	{
625 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
626 	}
627 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)628 	static inline void pthreadpool_store_relaxed_size_t(
629 		pthreadpool_atomic_size_t* address,
630 		size_t value)
631 	{
632 		__iso_volatile_store64((volatile __int64*) address, (__int64) value);
633 	}
634 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)635 	static inline void pthreadpool_store_relaxed_void_p(
636 		pthreadpool_atomic_void_p* address,
637 		void* value)
638 	{
639 		__iso_volatile_store64((volatile __int64*) address, (__int64) value);
640 	}
641 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)642 	static inline void pthreadpool_store_release_uint32_t(
643 		pthreadpool_atomic_uint32_t* address,
644 		uint32_t value)
645 	{
646 		_WriteBarrier();
647 		__stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value);
648 	}
649 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)650 	static inline void pthreadpool_store_release_size_t(
651 		pthreadpool_atomic_size_t* address,
652 		size_t value)
653 	{
654 		_WriteBarrier();
655 		__stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value);
656 	}
657 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)658 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
659 		pthreadpool_atomic_size_t* address)
660 	{
661 		return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address);
662 	}
663 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)664 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
665 		pthreadpool_atomic_size_t* address)
666 	{
667 		return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address);
668 	}
669 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)670 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
671 		pthreadpool_atomic_size_t* value)
672 	{
673 		size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value);
674 		while (actual_value != 0) {
675 			const size_t new_value = actual_value - 1;
676 			const size_t expected_value = actual_value;
677 			actual_value = _InterlockedCompareExchange64_nf(
678 				(volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
679 			if (actual_value == expected_value) {
680 				return true;
681 			}
682 		}
683 		return false;
684 	}
685 
pthreadpool_fence_acquire()686 	static inline void pthreadpool_fence_acquire() {
687 		__dmb(_ARM64_BARRIER_ISHLD);
688 		_ReadBarrier();
689 	}
690 
pthreadpool_fence_release()691 	static inline void pthreadpool_fence_release() {
692 		_WriteBarrier();
693 		__dmb(_ARM64_BARRIER_ISH);
694 	}
695 #elif defined(_MSC_VER) && defined(_M_ARM)
696 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
697 	typedef volatile size_t   pthreadpool_atomic_size_t;
698 	typedef void *volatile    pthreadpool_atomic_void_p;
699 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)700 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
701 		pthreadpool_atomic_uint32_t* address)
702 	{
703 		return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
704 	}
705 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)706 	static inline size_t pthreadpool_load_relaxed_size_t(
707 		pthreadpool_atomic_size_t* address)
708 	{
709 		return (size_t) __iso_volatile_load32((const volatile __int32*) address);
710 	}
711 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)712 	static inline void* pthreadpool_load_relaxed_void_p(
713 		pthreadpool_atomic_void_p* address)
714 	{
715 		return (void*) __iso_volatile_load32((const volatile __int32*) address);
716 	}
717 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)718 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
719 		pthreadpool_atomic_uint32_t* address)
720 	{
721 		const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
722 		__dmb(_ARM_BARRIER_ISH);
723 		_ReadBarrier();
724 		return value;
725 	}
726 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)727 	static inline size_t pthreadpool_load_acquire_size_t(
728 		pthreadpool_atomic_size_t* address)
729 	{
730 		const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address);
731 		__dmb(_ARM_BARRIER_ISH);
732 		_ReadBarrier();
733 		return value;
734 	}
735 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)736 	static inline void pthreadpool_store_relaxed_uint32_t(
737 		pthreadpool_atomic_uint32_t* address,
738 		uint32_t value)
739 	{
740 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
741 	}
742 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)743 	static inline void pthreadpool_store_relaxed_size_t(
744 		pthreadpool_atomic_size_t* address,
745 		size_t value)
746 	{
747 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
748 	}
749 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)750 	static inline void pthreadpool_store_relaxed_void_p(
751 		pthreadpool_atomic_void_p* address,
752 		void* value)
753 	{
754 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
755 	}
756 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)757 	static inline void pthreadpool_store_release_uint32_t(
758 		pthreadpool_atomic_uint32_t* address,
759 		uint32_t value)
760 	{
761 		_WriteBarrier();
762 		__dmb(_ARM_BARRIER_ISH);
763 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
764 	}
765 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)766 	static inline void pthreadpool_store_release_size_t(
767 		pthreadpool_atomic_size_t* address,
768 		size_t value)
769 	{
770 		_WriteBarrier();
771 		__dmb(_ARM_BARRIER_ISH);
772 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
773 	}
774 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)775 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
776 		pthreadpool_atomic_size_t* address)
777 	{
778 		return (size_t) _InterlockedDecrement_nf((volatile long*) address);
779 	}
780 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)781 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
782 		pthreadpool_atomic_size_t* address)
783 	{
784 		return (size_t) _InterlockedDecrement_rel((volatile long*) address);
785 	}
786 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)787 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
788 		pthreadpool_atomic_size_t* value)
789 	{
790 		size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value);
791 		while (actual_value != 0) {
792 			const size_t new_value = actual_value - 1;
793 			const size_t expected_value = actual_value;
794 			actual_value = _InterlockedCompareExchange_nf(
795 				(volatile long*) value, (long) new_value, (long) expected_value);
796 			if (actual_value == expected_value) {
797 				return true;
798 			}
799 		}
800 		return false;
801 	}
802 
pthreadpool_fence_acquire()803 	static inline void pthreadpool_fence_acquire() {
804 		__dmb(_ARM_BARRIER_ISH);
805 		_ReadBarrier();
806 	}
807 
pthreadpool_fence_release()808 	static inline void pthreadpool_fence_release() {
809 		_WriteBarrier();
810 		__dmb(_ARM_BARRIER_ISH);
811 	}
812 #else
813 	#error "Platform-specific implementation of threadpool-atomics.h required"
814 #endif
815 
816 #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
pthreadpool_yield()817 	static inline void pthreadpool_yield() {
818 		_mm_pause();
819 	}
820 #elif defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
pthreadpool_yield()821 	static inline void pthreadpool_yield() {
822 		__yield();
823 	}
824 #elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__))
pthreadpool_yield()825 	static inline void pthreadpool_yield() {
826 		__asm__ __volatile__("yield");
827 	}
828 #else
pthreadpool_yield()829 	static inline void pthreadpool_yield() {
830 		pthreadpool_fence_acquire();
831 	}
832 #endif
833