xref: /freebsd/share/man/man9/atomic.9 (revision 0957b409)
1.\" Copyright (c) 2000-2001 John H. Baldwin <jhb@FreeBSD.org>
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
14.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16.\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
17.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23.\"
24.\" $FreeBSD$
25.\"
26.Dd December 22, 2017
27.Dt ATOMIC 9
28.Os
29.Sh NAME
30.Nm atomic_add ,
31.Nm atomic_clear ,
32.Nm atomic_cmpset ,
33.Nm atomic_fcmpset ,
34.Nm atomic_fetchadd ,
35.Nm atomic_load ,
36.Nm atomic_readandclear ,
37.Nm atomic_set ,
38.Nm atomic_subtract ,
39.Nm atomic_store ,
40.Nm atomic_thread_fence
41.Nd atomic operations
42.Sh SYNOPSIS
43.In sys/types.h
44.In machine/atomic.h
45.Ft void
46.Fn atomic_add_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
47.Ft void
48.Fn atomic_clear_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
49.Ft int
50.Fo atomic_cmpset_[acq_|rel_]<type>
51.Fa "volatile <type> *dst"
52.Fa "<type> old"
53.Fa "<type> new"
54.Fc
55.Ft int
56.Fo atomic_fcmpset_[acq_|rel_]<type>
57.Fa "volatile <type> *dst"
58.Fa "<type> *old"
59.Fa "<type> new"
60.Fc
61.Ft <type>
62.Fn atomic_fetchadd_<type> "volatile <type> *p" "<type> v"
63.Ft <type>
64.Fn atomic_load_[acq_]<type> "volatile <type> *p"
65.Ft <type>
66.Fn atomic_readandclear_<type> "volatile <type> *p"
67.Ft void
68.Fn atomic_set_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
69.Ft void
70.Fn atomic_subtract_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
71.Ft void
72.Fn atomic_store_[rel_]<type> "volatile <type> *p" "<type> v"
73.Ft <type>
74.Fn atomic_swap_<type> "volatile <type> *p" "<type> v"
75.Ft int
76.Fn atomic_testandclear_<type> "volatile <type> *p" "u_int v"
77.Ft int
78.Fn atomic_testandset_<type> "volatile <type> *p" "u_int v"
79.Ft void
80.Fn atomic_thread_fence_[acq|acq_rel|rel|seq_cst] "void"
81.Sh DESCRIPTION
82Atomic operations are commonly used to implement reference counts and as
83building blocks for synchronization primitives, such as mutexes.
84.Pp
85All of these operations are performed
86.Em atomically
87across multiple threads and in the presence of interrupts, meaning that they
88are performed in an indivisible manner from the perspective of concurrently
89running threads and interrupt handlers.
90.Pp
91On all architectures supported by
92.Fx ,
93ordinary loads and stores of integers in cache-coherent memory are
94inherently atomic if the integer is naturally aligned and its size does not
95exceed the processor's word size.
96However, such loads and stores may be elided from the program by
97the compiler, whereas atomic operations are always performed.
98.Pp
99When atomic operations are performed on cache-coherent memory, all
100operations on the same location are totally ordered.
101.Pp
102When an atomic load is performed on a location in cache-coherent memory,
103it reads the entire value that was defined by the last atomic store to
104each byte of the location.
105An atomic load will never return a value out of thin air.
106When an atomic store is performed on a location, no other thread or
107interrupt handler will observe a
108.Em torn write ,
109or partial modification of the location.
110.Pp
111Except as noted below, the semantics of these operations are almost
112identical to the semantics of similarly named C11 atomic operations.
113.Ss Types
114Most atomic operations act upon a specific
115.Fa type .
116That type is indicated in the function name.
117In contrast to C11 atomic operations,
118.Fx Ns 's
119atomic operations are performed on ordinary integer types.
120The available types are:
121.Pp
122.Bl -tag -offset indent -width short -compact
123.It Li int
124unsigned integer
125.It Li long
126unsigned long integer
127.It Li ptr
128unsigned integer the size of a pointer
129.It Li 32
130unsigned 32-bit integer
131.It Li 64
132unsigned 64-bit integer
133.El
134.Pp
135For example, the function to atomically add two integers is called
136.Fn atomic_add_int .
137.Pp
138Certain architectures also provide operations for types smaller than
139.Dq Li int .
140.Pp
141.Bl -tag -offset indent -width short -compact
142.It Li char
143unsigned character
144.It Li short
145unsigned short integer
146.It Li 8
147unsigned 8-bit integer
148.It Li 16
149unsigned 16-bit integer
150.El
151.Pp
152These types must not be used in machine-independent code.
153.Ss Acquire and Release Operations
154By default, a thread's accesses to different memory locations might not be
155performed in
156.Em program order ,
157that is, the order in which the accesses appear in the source code.
158To optimize the program's execution, both the compiler and processor might
159reorder the thread's accesses.
160However, both ensure that their reordering of the accesses is not visible to
161the thread.
162Otherwise, the traditional memory model that is expected by single-threaded
163programs would be violated.
164Nonetheless, other threads in a multithreaded program, such as the
165.Fx
166kernel, might observe the reordering.
167Moreover, in some cases, such as the implementation of synchronization between
168threads, arbitrary reordering might result in the incorrect execution of the
169program.
170To constrain the reordering that both the compiler and processor might perform
171on a thread's accesses, a programmer can use atomic operations with
172.Em acquire
173and
174.Em release
175semantics.
176.Pp
177Atomic operations on memory have up to three variants.
178The first, or
179.Em relaxed
180variant, performs the operation without imposing any ordering constraints on
181accesses to other memory locations.
182This variant is the default.
183The second variant has acquire semantics, and the third variant has release
184semantics.
185.Pp
186When an atomic operation has acquire semantics, the operation must have
187completed before any subsequent load or store (by program order) is
188performed.
189Conversely, acquire semantics do not require that prior loads or stores have
190completed before the atomic operation is performed.
191An atomic operation can only have acquire semantics if it performs a load
192from memory.
193To denote acquire semantics, the suffix
194.Dq Li _acq
195is inserted into the function name immediately prior to the
196.Dq Li _ Ns Aq Fa type
197suffix.
198For example, to subtract two integers ensuring that the subtraction is
199completed before any subsequent loads and stores are performed, use
200.Fn atomic_subtract_acq_int .
201.Pp
202When an atomic operation has release semantics, all prior loads or stores
203(by program order) must have completed before the operation is performed.
204Conversely, release semantics do not require that the atomic operation must
205have completed before any subsequent load or store is performed.
206An atomic operation can only have release semantics if it performs a store
207to memory.
208To denote release semantics, the suffix
209.Dq Li _rel
210is inserted into the function name immediately prior to the
211.Dq Li _ Ns Aq Fa type
212suffix.
213For example, to add two long integers ensuring that all prior loads and
214stores are completed before the addition is performed, use
215.Fn atomic_add_rel_long .
216.Pp
217When a release operation by one thread
218.Em synchronizes with
219an acquire operation by another thread, usually meaning that the acquire
220operation reads the value written by the release operation, then the effects
221of all prior stores by the releasing thread must become visible to
222subsequent loads by the acquiring thread.
223Moreover, the effects of all stores (by other threads) that were visible to
224the releasing thread must also become visible to the acquiring thread.
225These rules only apply to the synchronizing threads.
226Other threads might observe these stores in a different order.
227.Pp
228In effect, atomic operations with acquire and release semantics establish
229one-way barriers to reordering that enable the implementations of
230synchronization primitives to express their ordering requirements without
231also imposing unnecessary ordering.
232For example, for a critical section guarded by a mutex, an acquire operation
233when the mutex is locked and a release operation when the mutex is unlocked
234will prevent any loads or stores from moving outside of the critical
235section.
236However, they will not prevent the compiler or processor from moving loads
237or stores into the critical section, which does not violate the semantics of
238a mutex.
239.Ss Thread Fence Operations
240Alternatively, a programmer can use atomic thread fence operations to
241constrain the reordering of accesses.
242In contrast to other atomic operations, fences do not, themselves, access
243memory.
244.Pp
245When a fence has acquire semantics, all prior loads (by program order) must
246have completed before any subsequent load or store is performed.
247Thus, an acquire fence is a two-way barrier for load operations.
248To denote acquire semantics, the suffix
249.Dq Li _acq
250is appended to the function name, for example,
251.Fn atomic_thread_fence_acq .
252.Pp
253When a fence has release semantics, all prior loads or stores (by program
254order) must have completed before any subsequent store operation is
255performed.
256Thus, a release fence is a two-way barrier for store operations.
257To denote release semantics, the suffix
258.Dq Li _rel
259is appended to the function name, for example,
260.Fn atomic_thread_fence_rel .
261.Pp
262Although
263.Fn atomic_thread_fence_acq_rel
264implements both acquire and release semantics, it is not a full barrier.
265For example, a store prior to the fence (in program order) may be completed
266after a load subsequent to the fence.
267In contrast,
268.Fn atomic_thread_fence_seq_cst
269implements a full barrier.
270Neither loads nor stores may cross this barrier in either direction.
271.Pp
272In C11, a release fence by one thread synchronizes with an acquire fence by
273another thread when an atomic load that is prior to the acquire fence (by
274program order) reads the value written by an atomic store that is subsequent
275to the release fence.
276In constrast, in FreeBSD, because of the atomicity of ordinary, naturally
277aligned loads and stores, fences can also be synchronized by ordinary loads
278and stores.
279This simplifies the implementation and use of some synchronization
280primitives in
281.Fx .
282.Pp
283Since neither a compiler nor a processor can foresee which (atomic) load
284will read the value written by an (atomic) store, the ordering constraints
285imposed by fences must be more restrictive than acquire loads and release
286stores.
287Essentially, this is why fences are two-way barriers.
288.Pp
289Although fences impose more restrictive ordering than acquire loads and
290release stores, by separating access from ordering, they can sometimes
291facilitate more efficient implementations of synchronization primitives.
292For example, they can be used to avoid executing a memory barrier until a
293memory access shows that some condition is satisfied.
294.Ss Multiple Processors
295In multiprocessor systems, the atomicity of the atomic operations on memory
296depends on support for cache coherence in the underlying architecture.
297In general, cache coherence on the default memory type,
298.Dv VM_MEMATTR_DEFAULT ,
299is guaranteed by all architectures that are supported by
300.Fx .
301For example, cache coherence is guaranteed on write-back memory by the
302.Tn amd64
303and
304.Tn i386
305architectures.
306However, on some architectures, cache coherence might not be enabled on all
307memory types.
308To determine if cache coherence is enabled for a non-default memory type,
309consult the architecture's documentation.
310.Ss Semantics
311This section describes the semantics of each operation using a C like notation.
312.Bl -hang
313.It Fn atomic_add p v
314.Bd -literal -compact
315*p += v;
316.Ed
317.It Fn atomic_clear p v
318.Bd -literal -compact
319*p &= ~v;
320.Ed
321.It Fn atomic_cmpset dst old new
322.Bd -literal -compact
323if (*dst == old) {
324	*dst = new;
325	return (1);
326} else
327	return (0);
328.Ed
329.El
330.Pp
331Some architectures do not implement the
332.Fn atomic_cmpset
333functions for the types
334.Dq Li char ,
335.Dq Li short ,
336.Dq Li 8 ,
337and
338.Dq Li 16 .
339.Bl -hang
340.It Fn atomic_fcmpset dst *old new
341.El
342.Pp
343On architectures implementing
344.Em Compare And Swap
345operation in hardware, the functionality can be described as
346.Bd -literal -offset indent -compact
347if (*dst == *old) {
348	*dst = new;
349	return (1);
350} else {
351	*old = *dst;
352	return (0);
353}
354.Ed
355On architectures which provide
356.Em Load Linked/Store Conditional
357primitive, the write to
358.Dv *dst
359might also fail for several reasons, most important of which
360is a parallel write to
361.Dv *dst
362cache line by other CPU.
363In this case
364.Fn atomic_fcmpset
365function also returns
366.Dv false ,
367despite
368.Dl *old == *dst .
369.Pp
370Some architectures do not implement the
371.Fn atomic_fcmpset
372functions for the types
373.Dq Li char ,
374.Dq Li short ,
375.Dq Li 8 ,
376and
377.Dq Li 16 .
378.Bl -hang
379.It Fn atomic_fetchadd p v
380.Bd -literal -compact
381tmp = *p;
382*p += v;
383return (tmp);
384.Ed
385.El
386.Pp
387The
388.Fn atomic_fetchadd
389functions are only implemented for the types
390.Dq Li int ,
391.Dq Li long
392and
393.Dq Li 32
394and do not have any variants with memory barriers at this time.
395.Bl -hang
396.It Fn atomic_load p
397.Bd -literal -compact
398return (*p);
399.Ed
400.It Fn atomic_readandclear p
401.Bd -literal -compact
402tmp = *p;
403*p = 0;
404return (tmp);
405.Ed
406.El
407.Pp
408The
409.Fn atomic_readandclear
410functions are not implemented for the types
411.Dq Li char ,
412.Dq Li short ,
413.Dq Li ptr ,
414.Dq Li 8 ,
415and
416.Dq Li 16
417and do not have any variants with memory barriers at this time.
418.Bl -hang
419.It Fn atomic_set p v
420.Bd -literal -compact
421*p |= v;
422.Ed
423.It Fn atomic_subtract p v
424.Bd -literal -compact
425*p -= v;
426.Ed
427.It Fn atomic_store p v
428.Bd -literal -compact
429*p = v;
430.Ed
431.It Fn atomic_swap p v
432.Bd -literal -compact
433tmp = *p;
434*p = v;
435return (tmp);
436.Ed
437.El
438.Pp
439The
440.Fn atomic_swap
441functions are not implemented for the types
442.Dq Li char ,
443.Dq Li short ,
444.Dq Li ptr ,
445.Dq Li 8 ,
446and
447.Dq Li 16
448and do not have any variants with memory barriers at this time.
449.Bl -hang
450.It Fn atomic_testandclear p v
451.Bd -literal -compact
452bit = 1 << (v % (sizeof(*p) * NBBY));
453tmp = (*p & bit) != 0;
454*p &= ~bit;
455return (tmp);
456.Ed
457.El
458.Bl -hang
459.It Fn atomic_testandset p v
460.Bd -literal -compact
461bit = 1 << (v % (sizeof(*p) * NBBY));
462tmp = (*p & bit) != 0;
463*p |= bit;
464return (tmp);
465.Ed
466.El
467.Pp
468The
469.Fn atomic_testandset
470and
471.Fn atomic_testandclear
472functions are only implemented for the types
473.Dq Li int ,
474.Dq Li long
475and
476.Dq Li 32
477and do not have any variants with memory barriers at this time.
478.Pp
479The type
480.Dq Li 64
481is currently not implemented for any of the atomic operations on the
482.Tn arm ,
483.Tn i386 ,
484and
485.Tn powerpc
486architectures.
487.Sh RETURN VALUES
488The
489.Fn atomic_cmpset
490function returns the result of the compare operation.
491The
492.Fn atomic_fcmpset
493function returns
494.Dv true
495if the operation succeeded.
496Otherwise it returns
497.Dv false
498and sets
499.Va *old
500to the found value.
501The
502.Fn atomic_fetchadd ,
503.Fn atomic_load ,
504.Fn atomic_readandclear ,
505and
506.Fn atomic_swap
507functions return the value at the specified address.
508The
509.Fn atomic_testandset
510and
511.Fn atomic_testandclear
512function returns the result of the test operation.
513.Sh EXAMPLES
514This example uses the
515.Fn atomic_cmpset_acq_ptr
516and
517.Fn atomic_set_ptr
518functions to obtain a sleep mutex and handle recursion.
519Since the
520.Va mtx_lock
521member of a
522.Vt "struct mtx"
523is a pointer, the
524.Dq Li ptr
525type is used.
526.Bd -literal
527/* Try to obtain mtx_lock once. */
528#define _obtain_lock(mp, tid)						\\
529	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid))
530
531/* Get a sleep lock, deal with recursion inline. */
532#define _get_sleep_lock(mp, tid, opts, file, line) do {			\\
533	uintptr_t _tid = (uintptr_t)(tid);				\\
534									\\
535	if (!_obtain_lock(mp, tid)) {					\\
536		if (((mp)->mtx_lock & MTX_FLAGMASK) != _tid)		\\
537			_mtx_lock_sleep((mp), _tid, (opts), (file), (line));\\
538		else {							\\
539			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSE);	\\
540			(mp)->mtx_recurse++;				\\
541		}							\\
542	}								\\
543} while (0)
544.Ed
545.Sh HISTORY
546The
547.Fn atomic_add ,
548.Fn atomic_clear ,
549.Fn atomic_set ,
550and
551.Fn atomic_subtract
552operations were introduced in
553.Fx 3.0 .
554Initially, these operations were defined on the types
555.Dq Li char ,
556.Dq Li short ,
557.Dq Li int ,
558and
559.Dq Li long .
560.Pp
561The
562.Fn atomic_cmpset ,
563.Fn atomic_load_acq ,
564.Fn atomic_readandclear ,
565and
566.Fn atomic_store_rel
567operations were added in
568.Fx 5.0 .
569Simultaneously, the acquire and release variants were introduced, and
570support was added for operation on the types
571.Dq Li 8 ,
572.Dq Li 16 ,
573.Dq Li 32 ,
574.Dq Li 64 ,
575and
576.Dq Li ptr .
577.Pp
578The
579.Fn atomic_fetchadd
580operation was added in
581.Fx 6.0 .
582.Pp
583The
584.Fn atomic_swap
585and
586.Fn atomic_testandset
587operations were added in
588.Fx 10.0 .
589.Pp
590The
591.Fn atomic_testandclear
592and
593.Fn atomic_thread_fence
594operations were added in
595.Fx 11.0 .
596.Pp
597The relaxed variants of
598.Fn atomic_load
599and
600.Fn atomic_store
601were added in
602.Fx 12.0 .
603