1 /*-------------------------------------------------------------------------
2 *
3 * atomics.h
4 * Atomic operations.
5 *
6 * Hardware and compiler dependent functions for manipulating memory
7 * atomically and dealing with cache coherency. Used to implement locking
8 * facilities and lockless algorithms/data structures.
9 *
10 * To bring up postgres on a platform/compiler at the very least
11 * implementations for the following operations should be provided:
12 * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier()
13 * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32()
14 * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag()
15 * * PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY should be defined if appropriate.
16 *
17 * There exist generic, hardware independent, implementations for several
18 * compilers which might be sufficient, although possibly not optimal, for a
19 * new platform. If no such generic implementation is available spinlocks (or
20 * even OS provided semaphores) will be used to implement the API.
21 *
22 * Implement _u64 atomics if and only if your platform can use them
23 * efficiently (and obviously correctly).
24 *
25 * Use higher level functionality (lwlocks, spinlocks, heavyweight locks)
26 * whenever possible. Writing correct code using these facilities is hard.
27 *
28 * For an introduction to using memory barriers within the PostgreSQL backend,
29 * see src/backend/storage/lmgr/README.barrier
30 *
31 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
32 * Portions Copyright (c) 1994, Regents of the University of California
33 *
34 * src/include/port/atomics.h
35 *
36 *-------------------------------------------------------------------------
37 */
38 #ifndef ATOMICS_H
39 #define ATOMICS_H
40
41 #ifdef FRONTEND
42 #error "atomics.h may not be included from frontend code"
43 #endif
44
45 #define INSIDE_ATOMICS_H
46
47 #include <limits.h>
48
49 /*
50 * First a set of architecture specific files is included.
51 *
52 * These files can provide the full set of atomics or can do pretty much
53 * nothing if all the compilers commonly used on these platforms provide
54 * usable generics.
55 *
56 * Don't add an inline assembly of the actual atomic operations if all the
57 * common implementations of your platform provide intrinsics. Intrinsics are
58 * much easier to understand and potentially support more architectures.
59 *
60 * It will often make sense to define memory barrier semantics here, since
61 * e.g. generic compiler intrinsics for x86 memory barriers can't know that
62 * postgres doesn't need x86 read/write barriers do anything more than a
63 * compiler barrier.
64 *
65 */
66 #if defined(__arm__) || defined(__arm) || \
67 defined(__aarch64__) || defined(__aarch64)
68 #include "port/atomics/arch-arm.h"
69 #elif defined(__i386__) || defined(__i386) || defined(__x86_64__)
70 #include "port/atomics/arch-x86.h"
71 #elif defined(__ia64__) || defined(__ia64)
72 #include "port/atomics/arch-ia64.h"
73 #elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
74 #include "port/atomics/arch-ppc.h"
75 #elif defined(__hppa) || defined(__hppa__)
76 #include "port/atomics/arch-hppa.h"
77 #endif
78
79 /*
80 * Compiler specific, but architecture independent implementations.
81 *
82 * Provide architecture independent implementations of the atomic
83 * facilities. At the very least compiler barriers should be provided, but a
84 * full implementation of
85 * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier()
86 * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32()
87 * using compiler intrinsics are a good idea.
88 */
89 /*
90 * Given a gcc-compatible xlc compiler, prefer the xlc implementation. The
91 * ppc64le "IBM XL C/C++ for Linux, V13.1.2" implements both interfaces, but
92 * __sync_lock_test_and_set() of one-byte types elicits SIGSEGV.
93 */
94 #if defined(__IBMC__) || defined(__IBMCPP__)
95 #include "port/atomics/generic-xlc.h"
96 /* gcc or compatible, including clang and icc */
97 #elif defined(__GNUC__) || defined(__INTEL_COMPILER)
98 #include "port/atomics/generic-gcc.h"
99 #elif defined(_MSC_VER)
100 #include "port/atomics/generic-msvc.h"
101 #elif defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
102 #include "port/atomics/generic-acc.h"
103 #elif defined(__SUNPRO_C) && !defined(__GNUC__)
104 #include "port/atomics/generic-sunpro.h"
105 #else
106 /*
107 * Unsupported compiler, we'll likely use slower fallbacks... At least
108 * compiler barriers should really be provided.
109 */
110 #endif
111
112 /*
113 * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and
114 * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics
115 * support. In the case of spinlock backed atomics the emulation is expected
116 * to be efficient, although less so than native atomics support.
117 */
118 #include "port/atomics/fallback.h"
119
120 /*
121 * Provide additional operations using supported infrastructure. These are
122 * expected to be efficient if the underlying atomic operations are efficient.
123 */
124 #include "port/atomics/generic.h"
125
126
127 /*
128 * pg_compiler_barrier - prevent the compiler from moving code across
129 *
130 * A compiler barrier need not (and preferably should not) emit any actual
131 * machine code, but must act as an optimization fence: the compiler must not
132 * reorder loads or stores to main memory around the barrier. However, the
133 * CPU may still reorder loads or stores at runtime, if the architecture's
134 * memory model permits this.
135 */
136 #define pg_compiler_barrier() pg_compiler_barrier_impl()
137
138 /*
139 * pg_memory_barrier - prevent the CPU from reordering memory access
140 *
141 * A memory barrier must act as a compiler barrier, and in addition must
142 * guarantee that all loads and stores issued prior to the barrier are
143 * completed before any loads or stores issued after the barrier. Unless
144 * loads and stores are totally ordered (which is not the case on most
145 * architectures) this requires issuing some sort of memory fencing
146 * instruction.
147 */
148 #define pg_memory_barrier() pg_memory_barrier_impl()
149
150 /*
151 * pg_(read|write)_barrier - prevent the CPU from reordering memory access
152 *
153 * A read barrier must act as a compiler barrier, and in addition must
154 * guarantee that any loads issued prior to the barrier are completed before
155 * any loads issued after the barrier. Similarly, a write barrier acts
156 * as a compiler barrier, and also orders stores. Read and write barriers
157 * are thus weaker than a full memory barrier, but stronger than a compiler
158 * barrier. In practice, on machines with strong memory ordering, read and
159 * write barriers may require nothing more than a compiler barrier.
160 */
161 #define pg_read_barrier() pg_read_barrier_impl()
162 #define pg_write_barrier() pg_write_barrier_impl()
163
164 /*
165 * Spinloop delay - Allow CPU to relax in busy loops
166 */
167 #define pg_spin_delay() pg_spin_delay_impl()
168
169 /*
170 * pg_atomic_init_flag - initialize atomic flag.
171 *
172 * No barrier semantics.
173 */
174 static inline void
pg_atomic_init_flag(volatile pg_atomic_flag * ptr)175 pg_atomic_init_flag(volatile pg_atomic_flag *ptr)
176 {
177 pg_atomic_init_flag_impl(ptr);
178 }
179
180 /*
181 * pg_atomic_test_and_set_flag - TAS()
182 *
183 * Returns true if the flag has successfully been set, false otherwise.
184 *
185 * Acquire (including read barrier) semantics.
186 */
187 static inline bool
pg_atomic_test_set_flag(volatile pg_atomic_flag * ptr)188 pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr)
189 {
190 return pg_atomic_test_set_flag_impl(ptr);
191 }
192
193 /*
194 * pg_atomic_unlocked_test_flag - Check if the lock is free
195 *
196 * Returns true if the flag currently is not set, false otherwise.
197 *
198 * No barrier semantics.
199 */
200 static inline bool
pg_atomic_unlocked_test_flag(volatile pg_atomic_flag * ptr)201 pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr)
202 {
203 return pg_atomic_unlocked_test_flag_impl(ptr);
204 }
205
206 /*
207 * pg_atomic_clear_flag - release lock set by TAS()
208 *
209 * Release (including write barrier) semantics.
210 */
211 static inline void
pg_atomic_clear_flag(volatile pg_atomic_flag * ptr)212 pg_atomic_clear_flag(volatile pg_atomic_flag *ptr)
213 {
214 pg_atomic_clear_flag_impl(ptr);
215 }
216
217
218 /*
219 * pg_atomic_init_u32 - initialize atomic variable
220 *
221 * Has to be done before any concurrent usage..
222 *
223 * No barrier semantics.
224 */
225 static inline void
pg_atomic_init_u32(volatile pg_atomic_uint32 * ptr,uint32 val)226 pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
227 {
228 AssertPointerAlignment(ptr, 4);
229
230 pg_atomic_init_u32_impl(ptr, val);
231 }
232
233 /*
234 * pg_atomic_read_u32 - unlocked read from atomic variable.
235 *
236 * The read is guaranteed to return a value as it has been written by this or
237 * another process at some point in the past. There's however no cache
238 * coherency interaction guaranteeing the value hasn't since been written to
239 * again.
240 *
241 * No barrier semantics.
242 */
243 static inline uint32
pg_atomic_read_u32(volatile pg_atomic_uint32 * ptr)244 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
245 {
246 AssertPointerAlignment(ptr, 4);
247 return pg_atomic_read_u32_impl(ptr);
248 }
249
250 /*
251 * pg_atomic_write_u32 - write to atomic variable.
252 *
253 * The write is guaranteed to succeed as a whole, i.e. it's not possible to
254 * observe a partial write for any reader. Note that this correctly interacts
255 * with pg_atomic_compare_exchange_u32, in contrast to
256 * pg_atomic_unlocked_write_u32().
257 *
258 * No barrier semantics.
259 */
260 static inline void
pg_atomic_write_u32(volatile pg_atomic_uint32 * ptr,uint32 val)261 pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
262 {
263 AssertPointerAlignment(ptr, 4);
264
265 pg_atomic_write_u32_impl(ptr, val);
266 }
267
268 /*
269 * pg_atomic_unlocked_write_u32 - unlocked write to atomic variable.
270 *
271 * The write is guaranteed to succeed as a whole, i.e. it's not possible to
272 * observe a partial write for any reader. But note that writing this way is
273 * not guaranteed to correctly interact with read-modify-write operations like
274 * pg_atomic_compare_exchange_u32. This should only be used in cases where
275 * minor performance regressions due to atomics emulation are unacceptable.
276 *
277 * No barrier semantics.
278 */
279 static inline void
pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 * ptr,uint32 val)280 pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
281 {
282 AssertPointerAlignment(ptr, 4);
283
284 pg_atomic_unlocked_write_u32_impl(ptr, val);
285 }
286
287 /*
288 * pg_atomic_exchange_u32 - exchange newval with current value
289 *
290 * Returns the old value of 'ptr' before the swap.
291 *
292 * Full barrier semantics.
293 */
294 static inline uint32
pg_atomic_exchange_u32(volatile pg_atomic_uint32 * ptr,uint32 newval)295 pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
296 {
297 AssertPointerAlignment(ptr, 4);
298
299 return pg_atomic_exchange_u32_impl(ptr, newval);
300 }
301
302 /*
303 * pg_atomic_compare_exchange_u32 - CAS operation
304 *
305 * Atomically compare the current value of ptr with *expected and store newval
306 * iff ptr and *expected have the same value. The current value of *ptr will
307 * always be stored in *expected.
308 *
309 * Return true if values have been exchanged, false otherwise.
310 *
311 * Full barrier semantics.
312 */
313 static inline bool
pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 * ptr,uint32 * expected,uint32 newval)314 pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr,
315 uint32 *expected, uint32 newval)
316 {
317 AssertPointerAlignment(ptr, 4);
318 AssertPointerAlignment(expected, 4);
319
320 return pg_atomic_compare_exchange_u32_impl(ptr, expected, newval);
321 }
322
323 /*
324 * pg_atomic_fetch_add_u32 - atomically add to variable
325 *
326 * Returns the value of ptr before the arithmetic operation.
327 *
328 * Full barrier semantics.
329 */
330 static inline uint32
pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 * ptr,int32 add_)331 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
332 {
333 AssertPointerAlignment(ptr, 4);
334 return pg_atomic_fetch_add_u32_impl(ptr, add_);
335 }
336
337 /*
338 * pg_atomic_fetch_sub_u32 - atomically subtract from variable
339 *
340 * Returns the value of ptr before the arithmetic operation. Note that sub_
341 * may not be INT_MIN due to platform limitations.
342 *
343 * Full barrier semantics.
344 */
345 static inline uint32
pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 * ptr,int32 sub_)346 pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
347 {
348 AssertPointerAlignment(ptr, 4);
349 Assert(sub_ != INT_MIN);
350 return pg_atomic_fetch_sub_u32_impl(ptr, sub_);
351 }
352
353 /*
354 * pg_atomic_fetch_and_u32 - atomically bit-and and_ with variable
355 *
356 * Returns the value of ptr before the arithmetic operation.
357 *
358 * Full barrier semantics.
359 */
360 static inline uint32
pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 * ptr,uint32 and_)361 pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_)
362 {
363 AssertPointerAlignment(ptr, 4);
364 return pg_atomic_fetch_and_u32_impl(ptr, and_);
365 }
366
367 /*
368 * pg_atomic_fetch_or_u32 - atomically bit-or or_ with variable
369 *
370 * Returns the value of ptr before the arithmetic operation.
371 *
372 * Full barrier semantics.
373 */
374 static inline uint32
pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 * ptr,uint32 or_)375 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
376 {
377 AssertPointerAlignment(ptr, 4);
378 return pg_atomic_fetch_or_u32_impl(ptr, or_);
379 }
380
381 /*
382 * pg_atomic_add_fetch_u32 - atomically add to variable
383 *
384 * Returns the value of ptr after the arithmetic operation.
385 *
386 * Full barrier semantics.
387 */
388 static inline uint32
pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 * ptr,int32 add_)389 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
390 {
391 AssertPointerAlignment(ptr, 4);
392 return pg_atomic_add_fetch_u32_impl(ptr, add_);
393 }
394
395 /*
396 * pg_atomic_sub_fetch_u32 - atomically subtract from variable
397 *
398 * Returns the value of ptr after the arithmetic operation. Note that sub_ may
399 * not be INT_MIN due to platform limitations.
400 *
401 * Full barrier semantics.
402 */
403 static inline uint32
pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 * ptr,int32 sub_)404 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
405 {
406 AssertPointerAlignment(ptr, 4);
407 Assert(sub_ != INT_MIN);
408 return pg_atomic_sub_fetch_u32_impl(ptr, sub_);
409 }
410
411 /* ----
412 * The 64 bit operations have the same semantics as their 32bit counterparts
413 * if they are available. Check the corresponding 32bit function for
414 * documentation.
415 * ----
416 */
417 static inline void
pg_atomic_init_u64(volatile pg_atomic_uint64 * ptr,uint64 val)418 pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
419 {
420 /*
421 * Can't necessarily enforce alignment - and don't need it - when using
422 * the spinlock based fallback implementation. Therefore only assert when
423 * not using it.
424 */
425 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
426 AssertPointerAlignment(ptr, 8);
427 #endif
428 pg_atomic_init_u64_impl(ptr, val);
429 }
430
431 static inline uint64
pg_atomic_read_u64(volatile pg_atomic_uint64 * ptr)432 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
433 {
434 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
435 AssertPointerAlignment(ptr, 8);
436 #endif
437 return pg_atomic_read_u64_impl(ptr);
438 }
439
440 static inline void
pg_atomic_write_u64(volatile pg_atomic_uint64 * ptr,uint64 val)441 pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
442 {
443 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
444 AssertPointerAlignment(ptr, 8);
445 #endif
446 pg_atomic_write_u64_impl(ptr, val);
447 }
448
449 static inline uint64
pg_atomic_exchange_u64(volatile pg_atomic_uint64 * ptr,uint64 newval)450 pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval)
451 {
452 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
453 AssertPointerAlignment(ptr, 8);
454 #endif
455 return pg_atomic_exchange_u64_impl(ptr, newval);
456 }
457
458 static inline bool
pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 * ptr,uint64 * expected,uint64 newval)459 pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr,
460 uint64 *expected, uint64 newval)
461 {
462 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
463 AssertPointerAlignment(ptr, 8);
464 AssertPointerAlignment(expected, 8);
465 #endif
466 return pg_atomic_compare_exchange_u64_impl(ptr, expected, newval);
467 }
468
469 static inline uint64
pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 * ptr,int64 add_)470 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
471 {
472 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
473 AssertPointerAlignment(ptr, 8);
474 #endif
475 return pg_atomic_fetch_add_u64_impl(ptr, add_);
476 }
477
478 static inline uint64
pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 * ptr,int64 sub_)479 pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
480 {
481 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
482 AssertPointerAlignment(ptr, 8);
483 #endif
484 Assert(sub_ != PG_INT64_MIN);
485 return pg_atomic_fetch_sub_u64_impl(ptr, sub_);
486 }
487
488 static inline uint64
pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 * ptr,uint64 and_)489 pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_)
490 {
491 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
492 AssertPointerAlignment(ptr, 8);
493 #endif
494 return pg_atomic_fetch_and_u64_impl(ptr, and_);
495 }
496
497 static inline uint64
pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 * ptr,uint64 or_)498 pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_)
499 {
500 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
501 AssertPointerAlignment(ptr, 8);
502 #endif
503 return pg_atomic_fetch_or_u64_impl(ptr, or_);
504 }
505
506 static inline uint64
pg_atomic_add_fetch_u64(volatile pg_atomic_uint64 * ptr,int64 add_)507 pg_atomic_add_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
508 {
509 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
510 AssertPointerAlignment(ptr, 8);
511 #endif
512 return pg_atomic_add_fetch_u64_impl(ptr, add_);
513 }
514
515 static inline uint64
pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 * ptr,int64 sub_)516 pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
517 {
518 #ifndef PG_HAVE_ATOMIC_U64_SIMULATION
519 AssertPointerAlignment(ptr, 8);
520 #endif
521 Assert(sub_ != PG_INT64_MIN);
522 return pg_atomic_sub_fetch_u64_impl(ptr, sub_);
523 }
524
525 #undef INSIDE_ATOMICS_H
526
527 #endif /* ATOMICS_H */
528