1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /*
8  * Implements (almost always) lock-free atomic operations. The operations here
9  * are a subset of that which can be found in C++11's <atomic> header, with a
10  * different API to enforce consistent memory ordering constraints.
11  *
12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
13  * sent a copy of this header and the C++11 standard.
14  */
15 
16 #ifndef mozilla_Atomics_h
17 #define mozilla_Atomics_h
18 
19 #include "mozilla/Assertions.h"
20 #include "mozilla/Attributes.h"
21 #include "mozilla/Compiler.h"
22 #include "mozilla/TypeTraits.h"
23 
24 #include <atomic>
25 
26 #include <stdint.h>
27 
28 namespace mozilla {
29 
30 /**
31  * An enum of memory ordering possibilities for atomics.
32  *
33  * Memory ordering is the observable state of distinct values in memory.
34  * (It's a separate concept from atomicity, which concerns whether an
35  * operation can ever be observed in an intermediate state.  Don't
36  * conflate the two!)  Given a sequence of operations in source code on
37  * memory, it is *not* always the case that, at all times and on all
38  * cores, those operations will appear to have occurred in that exact
39  * sequence.  First, the compiler might reorder that sequence, if it
40  * thinks another ordering will be more efficient.  Second, the CPU may
41  * not expose so consistent a view of memory.  CPUs will often perform
42  * their own instruction reordering, above and beyond that performed by
43  * the compiler.  And each core has its own memory caches, and accesses
44  * (reads and writes both) to "memory" may only resolve to out-of-date
45  * cache entries -- not to the "most recently" performed operation in
46  * some global sense.  Any access to a value that may be used by
47  * multiple threads, potentially across multiple cores, must therefore
48  * have a memory ordering imposed on it, for all code on all
49  * threads/cores to have a sufficiently coherent worldview.
50  *
51  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
52  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
53  * detail on all this, including examples of how each mode works.
54  *
55  * Note that for simplicity and practicality, not all of the modes in
56  * C++11 are supported.  The missing C++11 modes are either subsumed by
57  * the modes we provide below, or not relevant for the CPUs we support
58  * in Gecko.  These three modes are confusing enough as it is!
59  */
60 enum MemoryOrdering {
61   /*
62    * Relaxed ordering is the simplest memory ordering: none at all.
63    * When the result of a write is observed, nothing may be inferred
64    * about other memory.  Writes ostensibly performed "before" on the
65    * writing thread may not yet be visible.  Writes performed "after" on
66    * the writing thread may already be visible, if the compiler or CPU
67    * reordered them.  (The latter can happen if reads and/or writes get
68    * held up in per-processor caches.)  Relaxed ordering means
69    * operations can always use cached values (as long as the actual
70    * updates to atomic values actually occur, correctly, eventually), so
71    * it's usually the fastest sort of atomic access.  For this reason,
72    * *it's also the most dangerous kind of access*.
73    *
74    * Relaxed ordering is good for things like process-wide statistics
75    * counters that don't need to be consistent with anything else, so
76    * long as updates themselves are atomic.  (And so long as any
77    * observations of that value can tolerate being out-of-date -- if you
78    * need some sort of up-to-date value, you need some sort of other
79    * synchronizing operation.)  It's *not* good for locks, mutexes,
80    * reference counts, etc. that mediate access to other memory, or must
81    * be observably consistent with other memory.
82    *
83    * x86 architectures don't take advantage of the optimization
84    * opportunities that relaxed ordering permits.  Thus it's possible
85    * that using relaxed ordering will "work" on x86 but fail elsewhere
86    * (ARM, say, which *does* implement non-sequentially-consistent
87    * relaxed ordering semantics).  Be extra-careful using relaxed
88    * ordering if you can't easily test non-x86 architectures!
89    */
90   Relaxed,
91 
92   /*
93    * When an atomic value is updated with ReleaseAcquire ordering, and
94    * that new value is observed with ReleaseAcquire ordering, prior
95    * writes (atomic or not) are also observable.  What ReleaseAcquire
96    * *doesn't* give you is any observable ordering guarantees for
97    * ReleaseAcquire-ordered operations on different objects.  For
98    * example, if there are two cores that each perform ReleaseAcquire
99    * operations on separate objects, each core may or may not observe
100    * the operations made by the other core.  The only way the cores can
101    * be synchronized with ReleaseAcquire is if they both
102    * ReleaseAcquire-access the same object.  This implies that you can't
103    * necessarily describe some global total ordering of ReleaseAcquire
104    * operations.
105    *
106    * ReleaseAcquire ordering is good for (as the name implies) atomic
107    * operations on values controlling ownership of things: reference
108    * counts, mutexes, and the like.  However, if you are thinking about
109    * using these to implement your own locks or mutexes, you should take
110    * a good, hard look at actual lock or mutex primitives first.
111    */
112   ReleaseAcquire,
113 
114   /*
115    * When an atomic value is updated with SequentiallyConsistent
116    * ordering, all writes observable when the update is observed, just
117    * as with ReleaseAcquire ordering.  But, furthermore, a global total
118    * ordering of SequentiallyConsistent operations *can* be described.
119    * For example, if two cores perform SequentiallyConsistent operations
120    * on separate objects, one core will observably perform its update
121    * (and all previous operations will have completed), then the other
122    * core will observably perform its update (and all previous
123    * operations will have completed).  (Although those previous
124    * operations aren't themselves ordered -- they could be intermixed,
125    * or ordered if they occur on atomic values with ordering
126    * requirements.)  SequentiallyConsistent is the *simplest and safest*
127    * ordering of atomic operations -- it's always as if one operation
128    * happens, then another, then another, in some order -- and every
129    * core observes updates to happen in that single order.  Because it
130    * has the most synchronization requirements, operations ordered this
131    * way also tend to be slowest.
132    *
133    * SequentiallyConsistent ordering can be desirable when multiple
134    * threads observe objects, and they all have to agree on the
135    * observable order of changes to them.  People expect
136    * SequentiallyConsistent ordering, even if they shouldn't, when
137    * writing code, atomic or otherwise.  SequentiallyConsistent is also
138    * the ordering of choice when designing lockless data structures.  If
139    * you don't know what order to use, use this one.
140    */
141   SequentiallyConsistent,
142 };
143 
144 namespace detail {
145 
146 /*
147  * We provide CompareExchangeFailureOrder to work around a bug in some
148  * versions of GCC's <atomic> header.  See bug 898491.
149  */
150 template <MemoryOrdering Order>
151 struct AtomicOrderConstraints;
152 
153 template <>
154 struct AtomicOrderConstraints<Relaxed> {
155   static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
156   static const std::memory_order LoadOrder = std::memory_order_relaxed;
157   static const std::memory_order StoreOrder = std::memory_order_relaxed;
158   static const std::memory_order CompareExchangeFailureOrder =
159       std::memory_order_relaxed;
160 };
161 
162 template <>
163 struct AtomicOrderConstraints<ReleaseAcquire> {
164   static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
165   static const std::memory_order LoadOrder = std::memory_order_acquire;
166   static const std::memory_order StoreOrder = std::memory_order_release;
167   static const std::memory_order CompareExchangeFailureOrder =
168       std::memory_order_acquire;
169 };
170 
171 template <>
172 struct AtomicOrderConstraints<SequentiallyConsistent> {
173   static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
174   static const std::memory_order LoadOrder = std::memory_order_seq_cst;
175   static const std::memory_order StoreOrder = std::memory_order_seq_cst;
176   static const std::memory_order CompareExchangeFailureOrder =
177       std::memory_order_seq_cst;
178 };
179 
180 template <typename T, MemoryOrdering Order>
181 struct IntrinsicBase {
182   typedef std::atomic<T> ValueType;
183   typedef AtomicOrderConstraints<Order> OrderedOp;
184 };
185 
186 template <typename T, MemoryOrdering Order>
187 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order> {
188   typedef IntrinsicBase<T, Order> Base;
189 
190   static T load(const typename Base::ValueType& aPtr) {
191     return aPtr.load(Base::OrderedOp::LoadOrder);
192   }
193 
194   static void store(typename Base::ValueType& aPtr, T aVal) {
195     aPtr.store(aVal, Base::OrderedOp::StoreOrder);
196   }
197 
198   static T exchange(typename Base::ValueType& aPtr, T aVal) {
199     return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
200   }
201 
202   static bool compareExchange(typename Base::ValueType& aPtr, T aOldVal,
203                               T aNewVal) {
204     return aPtr.compare_exchange_strong(
205         aOldVal, aNewVal, Base::OrderedOp::AtomicRMWOrder,
206         Base::OrderedOp::CompareExchangeFailureOrder);
207   }
208 };
209 
210 template <typename T, MemoryOrdering Order>
211 struct IntrinsicAddSub : public IntrinsicBase<T, Order> {
212   typedef IntrinsicBase<T, Order> Base;
213 
214   static T add(typename Base::ValueType& aPtr, T aVal) {
215     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
216   }
217 
218   static T sub(typename Base::ValueType& aPtr, T aVal) {
219     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
220   }
221 };
222 
223 template <typename T, MemoryOrdering Order>
224 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order> {
225   typedef IntrinsicBase<T*, Order> Base;
226 
227   static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
228     return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
229   }
230 
231   static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
232     return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
233   }
234 };
235 
236 template <typename T, MemoryOrdering Order>
237 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order> {
238   typedef IntrinsicBase<T, Order> Base;
239 
240   static T inc(typename Base::ValueType& aPtr) {
241     return IntrinsicAddSub<T, Order>::add(aPtr, 1);
242   }
243 
244   static T dec(typename Base::ValueType& aPtr) {
245     return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
246   }
247 };
248 
249 template <typename T, MemoryOrdering Order>
250 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
251                           public IntrinsicIncDec<T, Order> {
252   typedef IntrinsicBase<T, Order> Base;
253 
254   static T or_(typename Base::ValueType& aPtr, T aVal) {
255     return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
256   }
257 
258   static T xor_(typename Base::ValueType& aPtr, T aVal) {
259     return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
260   }
261 
262   static T and_(typename Base::ValueType& aPtr, T aVal) {
263     return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
264   }
265 };
266 
267 template <typename T, MemoryOrdering Order>
268 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
269                                      public IntrinsicIncDec<T*, Order> {};
270 
271 template <typename T>
272 struct ToStorageTypeArgument {
273   static constexpr T convert(T aT) { return aT; }
274 };
275 
276 template <typename T, MemoryOrdering Order>
277 class AtomicBase {
278   static_assert(sizeof(T) == 4 || sizeof(T) == 8,
279                 "mozilla/Atomics.h only supports 32-bit and 64-bit types");
280 
281  protected:
282   typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
283   typedef typename Intrinsics::ValueType ValueType;
284   ValueType mValue;
285 
286  public:
287   constexpr AtomicBase() : mValue() {}
288   explicit constexpr AtomicBase(T aInit)
289       : mValue(ToStorageTypeArgument<T>::convert(aInit)) {}
290 
291   // Note: we can't provide operator T() here because Atomic<bool> inherits
292   // from AtomcBase with T=uint32_t and not T=bool. If we implemented
293   // operator T() here, it would cause errors when comparing Atomic<bool> with
294   // a regular bool.
295 
296   T operator=(T aVal) {
297     Intrinsics::store(mValue, aVal);
298     return aVal;
299   }
300 
301   /**
302    * Performs an atomic swap operation.  aVal is stored and the previous
303    * value of this variable is returned.
304    */
305   T exchange(T aVal) { return Intrinsics::exchange(mValue, aVal); }
306 
307   /**
308    * Performs an atomic compare-and-swap operation and returns true if it
309    * succeeded. This is equivalent to atomically doing
310    *
311    *   if (mValue == aOldValue) {
312    *     mValue = aNewValue;
313    *     return true;
314    *   } else {
315    *     return false;
316    *   }
317    */
318   bool compareExchange(T aOldValue, T aNewValue) {
319     return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
320   }
321 
322  private:
323   template <MemoryOrdering AnyOrder>
324   AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) = delete;
325 };
326 
327 template <typename T, MemoryOrdering Order>
328 class AtomicBaseIncDec : public AtomicBase<T, Order> {
329   typedef typename detail::AtomicBase<T, Order> Base;
330 
331  public:
332   constexpr AtomicBaseIncDec() : Base() {}
333   explicit constexpr AtomicBaseIncDec(T aInit) : Base(aInit) {}
334 
335   using Base::operator=;
336 
337   operator T() const { return Base::Intrinsics::load(Base::mValue); }
338   T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
339   T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
340   T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
341   T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
342 
343  private:
344   template <MemoryOrdering AnyOrder>
345   AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) = delete;
346 };
347 
348 }  // namespace detail
349 
350 /**
351  * A wrapper for a type that enforces that all memory accesses are atomic.
352  *
353  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
354  * its place.  Implementations for integral and pointer types are provided
355  * below.
356  *
357  * Atomic accesses are sequentially consistent by default.  You should
358  * use the default unless you are tall enough to ride the
359  * memory-ordering roller coaster (if you're not sure, you aren't) and
360  * you have a compelling reason to do otherwise.
361  *
362  * There is one exception to the case of atomic memory accesses: providing an
363  * initial value of the atomic value is not guaranteed to be atomic.  This is a
364  * deliberate design choice that enables static atomic variables to be declared
365  * without introducing extra static constructors.
366  */
367 template <typename T, MemoryOrdering Order = SequentiallyConsistent,
368           typename Enable = void>
369 class Atomic;
370 
371 /**
372  * Atomic<T> implementation for integral types.
373  *
374  * In addition to atomic store and load operations, compound assignment and
375  * increment/decrement operators are implemented which perform the
376  * corresponding read-modify-write operation atomically.  Finally, an atomic
377  * swap method is provided.
378  */
379 template <typename T, MemoryOrdering Order>
380 class Atomic<
381     T, Order,
382     typename EnableIf<IsIntegral<T>::value && !IsSame<T, bool>::value>::Type>
383     : public detail::AtomicBaseIncDec<T, Order> {
384   typedef typename detail::AtomicBaseIncDec<T, Order> Base;
385 
386  public:
387   constexpr Atomic() : Base() {}
388   explicit constexpr Atomic(T aInit) : Base(aInit) {}
389 
390   using Base::operator=;
391 
392   T operator+=(T aDelta) {
393     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
394   }
395 
396   T operator-=(T aDelta) {
397     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
398   }
399 
400   T operator|=(T aVal) {
401     return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
402   }
403 
404   T operator^=(T aVal) {
405     return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
406   }
407 
408   T operator&=(T aVal) {
409     return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
410   }
411 
412  private:
413   Atomic(Atomic<T, Order>& aOther) = delete;
414 };
415 
416 /**
417  * Atomic<T> implementation for pointer types.
418  *
419  * An atomic compare-and-swap primitive for pointer variables is provided, as
420  * are atomic increment and decement operators.  Also provided are the compound
421  * assignment operators for addition and subtraction. Atomic swap (via
422  * exchange()) is included as well.
423  */
424 template <typename T, MemoryOrdering Order>
425 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order> {
426   typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
427 
428  public:
429   constexpr Atomic() : Base() {}
430   explicit constexpr Atomic(T* aInit) : Base(aInit) {}
431 
432   using Base::operator=;
433 
434   T* operator+=(ptrdiff_t aDelta) {
435     return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
436   }
437 
438   T* operator-=(ptrdiff_t aDelta) {
439     return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
440   }
441 
442  private:
443   Atomic(Atomic<T*, Order>& aOther) = delete;
444 };
445 
446 /**
447  * Atomic<T> implementation for enum types.
448  *
449  * The atomic store and load operations and the atomic swap method is provided.
450  */
451 template <typename T, MemoryOrdering Order>
452 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
453     : public detail::AtomicBase<T, Order> {
454   typedef typename detail::AtomicBase<T, Order> Base;
455 
456  public:
457   constexpr Atomic() : Base() {}
458   explicit constexpr Atomic(T aInit) : Base(aInit) {}
459 
460   operator T() const { return T(Base::Intrinsics::load(Base::mValue)); }
461 
462   using Base::operator=;
463 
464  private:
465   Atomic(Atomic<T, Order>& aOther) = delete;
466 };
467 
468 /**
469  * Atomic<T> implementation for boolean types.
470  *
471  * The atomic store and load operations and the atomic swap method is provided.
472  *
473  * Note:
474  *
475  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
476  *   bool and/or some implementations of std::atomic. This is allowed in
477  *   [atomic.types.generic]p9.
478  *
479  * - It's not obvious whether the 8-bit atomic functions on Windows are always
480  *   inlined or not. If they are not inlined, the corresponding functions in the
481  *   runtime library are not available on Windows XP. This is why we implement
482  *   Atomic<bool> with an underlying type of uint32_t.
483  */
484 template <MemoryOrdering Order>
485 class Atomic<bool, Order> : protected detail::AtomicBase<uint32_t, Order> {
486   typedef typename detail::AtomicBase<uint32_t, Order> Base;
487 
488  public:
489   constexpr Atomic() : Base() {}
490   explicit constexpr Atomic(bool aInit) : Base(aInit) {}
491 
492   // We provide boolean wrappers for the underlying AtomicBase methods.
493   MOZ_IMPLICIT operator bool() const {
494     return Base::Intrinsics::load(Base::mValue);
495   }
496 
497   bool operator=(bool aVal) { return Base::operator=(aVal); }
498 
499   bool exchange(bool aVal) { return Base::exchange(aVal); }
500 
501   bool compareExchange(bool aOldValue, bool aNewValue) {
502     return Base::compareExchange(aOldValue, aNewValue);
503   }
504 
505  private:
506   Atomic(Atomic<bool, Order>& aOther) = delete;
507 };
508 
509 // If you want to atomically swap two atomic values, use exchange().
510 template <typename T, MemoryOrdering Order>
511 void Swap(Atomic<T, Order>&, Atomic<T, Order>&) = delete;
512 
513 }  // namespace mozilla
514 
515 #endif /* mozilla_Atomics_h */
516