1 /**
2  * The atomic module provides basic support for lock-free
3  * concurrent programming.
4  *
5  * Copyright: Copyright Sean Kelly 2005 - 2016.
6  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Authors:   Sean Kelly, Alex Rønne Petersen
8  * Source:    $(DRUNTIMESRC core/_atomic.d)
9  */
10 
11 
12 /* NOTE: This file has been patched from the original DMD distribution to
13  * work with the GDC compiler.
14  */
15 module core.atomic;
16 
version(D_InlineAsm_X86)17 version (D_InlineAsm_X86)
18 {
19     version = AsmX86;
20     version = AsmX86_32;
21     enum has64BitCAS = true;
22     enum has128BitCAS = false;
23 }
version(D_InlineAsm_X86_64)24 else version (D_InlineAsm_X86_64)
25 {
26     version = AsmX86;
27     version = AsmX86_64;
28     enum has64BitCAS = true;
29     enum has128BitCAS = true;
30 }
version(GNU)31 else version (GNU)
32 {
33     import gcc.config;
34     enum has64BitCAS = GNU_Have_64Bit_Atomics;
35     enum has128BitCAS = GNU_Have_LibAtomic;
36 }
37 else
38 {
39     enum has64BitCAS = false;
40     enum has128BitCAS = false;
41 }
42 
43 private
44 {
HeadUnshared(T)45     template HeadUnshared(T)
46     {
47         static if ( is( T U : shared(U*) ) )
48             alias shared(U)* HeadUnshared;
49         else
50             alias T HeadUnshared;
51     }
52 }
53 
54 
version(AsmX86)55 version (AsmX86)
56 {
57     // NOTE: Strictly speaking, the x86 supports atomic operations on
58     //       unaligned values.  However, this is far slower than the
59     //       common case, so such behavior should be prohibited.
60     private bool atomicValueIsProperlyAligned(T)( ref T val ) pure nothrow @nogc @trusted
61     {
62         return atomicPtrIsProperlyAligned(&val);
63     }
64 
65     private bool atomicPtrIsProperlyAligned(T)( T* ptr ) pure nothrow @nogc @safe
66     {
67         // NOTE: 32 bit x86 systems support 8 byte CAS, which only requires
68         //       4 byte alignment, so use size_t as the align type here.
69         static if ( T.sizeof > size_t.sizeof )
70             return cast(size_t)ptr % size_t.sizeof == 0;
71         else
72             return cast(size_t)ptr % T.sizeof == 0;
73     }
74 }
75 
76 
version(CoreDdoc)77 version (CoreDdoc)
78 {
79     /**
80      * Performs the binary operation 'op' on val using 'mod' as the modifier.
81      *
82      * Params:
83      *  val = The target variable.
84      *  mod = The modifier to apply.
85      *
86      * Returns:
87      *  The result of the operation.
88      */
89     HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc @safe
90         if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) )
91     {
92         return HeadUnshared!(T).init;
93     }
94 
95 
96     /**
97      * Stores 'writeThis' to the memory referenced by 'here' if the value
98      * referenced by 'here' is equal to 'ifThis'.  This operation is both
99      * lock-free and atomic.
100      *
101      * Params:
102      *  here      = The address of the destination variable.
103      *  writeThis = The value to store.
104      *  ifThis    = The comparison value.
105      *
106      * Returns:
107      *  true if the store occurred, false if not.
108      */
109     bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe
110         if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) );
111 
112     /// Ditto
113     bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe
114         if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) );
115 
116     /// Ditto
117     bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe
118         if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) );
119 
120     /**
121      * Loads 'val' from memory and returns it.  The memory barrier specified
122      * by 'ms' is applied to the operation, which is fully sequenced by
123      * default.  Valid memory orders are MemoryOrder.raw, MemoryOrder.acq,
124      * and MemoryOrder.seq.
125      *
126      * Params:
127      *  val = The target variable.
128      *
129      * Returns:
130      *  The value of 'val'.
131      */
132     HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq,T)( ref const shared T val ) pure nothrow @nogc @safe
133     {
134         return HeadUnshared!(T).init;
135     }
136 
137 
138     /**
139      * Writes 'newval' into 'val'.  The memory barrier specified by 'ms' is
140      * applied to the operation, which is fully sequenced by default.
141      * Valid memory orders are MemoryOrder.raw, MemoryOrder.rel, and
142      * MemoryOrder.seq.
143      *
144      * Params:
145      *  val    = The target variable.
146      *  newval = The value to store.
147      */
148     void atomicStore(MemoryOrder ms = MemoryOrder.seq,T,V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe
149         if ( __traits( compiles, { val = newval; } ) )
150     {
151 
152     }
153 
154 
155     /**
156      * Specifies the memory ordering semantics of an atomic operation.
157      */
158     enum MemoryOrder
159     {
160         raw,    /// Not sequenced.
161         acq,    /// Hoist-load + hoist-store barrier.
162         rel,    /// Sink-load + sink-store barrier.
163         seq,    /// Fully sequenced (acquire + release).
164     }
165 
166     deprecated("Please use MemoryOrder instead.")
167     alias MemoryOrder msync;
168 
169     /**
170      * Inserts a full load/store memory fence (on platforms that need it). This ensures
171      * that all loads and stores before a call to this function are executed before any
172      * loads and stores after the call.
173      */
174     void atomicFence() nothrow @nogc;
175 }
version(AsmX86_32)176 else version (AsmX86_32)
177 {
178     // Uses specialized asm for fast fetch and add operations
179     private HeadUnshared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe
180         if ( T.sizeof <= 4 )
181     {
182         size_t tmp = mod;
183         asm pure nothrow @nogc @trusted
184         {
185             mov EAX, tmp;
186             mov EDX, val;
187         }
188         static if (T.sizeof == 1) asm pure nothrow @nogc @trusted { lock; xadd[EDX], AL; }
189         else static if (T.sizeof == 2) asm pure nothrow @nogc @trusted { lock; xadd[EDX], AX; }
190         else static if (T.sizeof == 4) asm pure nothrow @nogc @trusted { lock; xadd[EDX], EAX; }
191 
192         asm pure nothrow @nogc @trusted
193         {
194             mov tmp, EAX;
195         }
196 
197         return cast(T)tmp;
198     }
199 
200     private HeadUnshared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe
201         if ( T.sizeof <= 4)
202     {
203         return atomicFetchAdd(val, -mod);
204     }
205 
206     HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc
207         if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) )
208     in
209     {
210         assert(atomicValueIsProperlyAligned(val));
211     }
212     body
213     {
214         // binary operators
215         //
216         // +    -   *   /   %   ^^  &
217         // |    ^   <<  >>  >>> ~   in
218         // ==   !=  <   <=  >   >=
219         static if ( op == "+"  || op == "-"  || op == "*"  || op == "/"   ||
220                    op == "%"  || op == "^^" || op == "&"  || op == "|"   ||
221                    op == "^"  || op == "<<" || op == ">>" || op == ">>>" ||
222                    op == "~"  || // skip "in"
223                    op == "==" || op == "!=" || op == "<"  || op == "<="  ||
224                    op == ">"  || op == ">=" )
225         {
226             HeadUnshared!(T) get = atomicLoad!(MemoryOrder.raw)( val );
227             mixin( "return get " ~ op ~ " mod;" );
228         }
229         else
230         // assignment operators
231         //
232         // +=   -=  *=  /=  %=  ^^= &=
233         // |=   ^=  <<= >>= >>>=    ~=
234         static if ( op == "+=" && __traits(isIntegral, T) && T.sizeof <= 4 && V1.sizeof <= 4)
235         {
236             return cast(T)(atomicFetchAdd!(T)(val, mod) + mod);
237         }
238         else static if ( op == "-=" && __traits(isIntegral, T) && T.sizeof <= 4 && V1.sizeof <= 4)
239         {
240             return cast(T)(atomicFetchSub!(T)(val, mod) - mod);
241         }
242         else static if ( op == "+=" || op == "-="  || op == "*="  || op == "/=" ||
243                    op == "%=" || op == "^^=" || op == "&="  || op == "|=" ||
244                    op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~="
245         {
246             HeadUnshared!(T) get, set;
247 
248             do
249             {
250                 get = set = atomicLoad!(MemoryOrder.raw)( val );
251                 mixin( "set " ~ op ~ " mod;" );
252             } while ( !casByRef( val, get, set ) );
253             return set;
254         }
255         else
256         {
257             static assert( false, "Operation not supported." );
258         }
259     }
260 
261     bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted
262     {
263         return cas(&value, ifThis, writeThis);
264     }
265 
266     bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe
267         if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) )
268     {
269         return casImpl(here, ifThis, writeThis);
270     }
271 
272     bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe
273         if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) )
274     {
275         return casImpl(here, ifThis, writeThis);
276     }
277 
278     bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe
279         if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) )
280     {
281         return casImpl(here, ifThis, writeThis);
282     }
283 
284     private bool casImpl(T,V1,V2)( shared(T)* here, V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe
285     in
286     {
287         assert( atomicPtrIsProperlyAligned( here ) );
288     }
289     body
290     {
291         static if ( T.sizeof == byte.sizeof )
292         {
293             //////////////////////////////////////////////////////////////////
294             // 1 Byte CAS
295             //////////////////////////////////////////////////////////////////
296 
297             asm pure nothrow @nogc @trusted
298             {
299                 mov DL, writeThis;
300                 mov AL, ifThis;
301                 mov ECX, here;
302                 lock; // lock always needed to make this op atomic
303                 cmpxchg [ECX], DL;
304                 setz AL;
305             }
306         }
307         else static if ( T.sizeof == short.sizeof )
308         {
309             //////////////////////////////////////////////////////////////////
310             // 2 Byte CAS
311             //////////////////////////////////////////////////////////////////
312 
313             asm pure nothrow @nogc @trusted
314             {
315                 mov DX, writeThis;
316                 mov AX, ifThis;
317                 mov ECX, here;
318                 lock; // lock always needed to make this op atomic
319                 cmpxchg [ECX], DX;
320                 setz AL;
321             }
322         }
323         else static if ( T.sizeof == int.sizeof )
324         {
325             //////////////////////////////////////////////////////////////////
326             // 4 Byte CAS
327             //////////////////////////////////////////////////////////////////
328 
329             asm pure nothrow @nogc @trusted
330             {
331                 mov EDX, writeThis;
332                 mov EAX, ifThis;
333                 mov ECX, here;
334                 lock; // lock always needed to make this op atomic
335                 cmpxchg [ECX], EDX;
336                 setz AL;
337             }
338         }
339         else static if ( T.sizeof == long.sizeof && has64BitCAS )
340         {
341 
342             //////////////////////////////////////////////////////////////////
343             // 8 Byte CAS on a 32-Bit Processor
344             //////////////////////////////////////////////////////////////////
345 
346             asm pure nothrow @nogc @trusted
347             {
348                 push EDI;
349                 push EBX;
350                 lea EDI, writeThis;
351                 mov EBX, [EDI];
352                 mov ECX, 4[EDI];
353                 lea EDI, ifThis;
354                 mov EAX, [EDI];
355                 mov EDX, 4[EDI];
356                 mov EDI, here;
357                 lock; // lock always needed to make this op atomic
358                 cmpxchg8b [EDI];
359                 setz AL;
360                 pop EBX;
361                 pop EDI;
362 
363             }
364 
365         }
366         else
367         {
368             static assert( false, "Invalid template type specified." );
369         }
370     }
371 
372 
373     enum MemoryOrder
374     {
375         raw,
376         acq,
377         rel,
378         seq,
379     }
380 
381     deprecated("Please use MemoryOrder instead.")
382     alias MemoryOrder msync;
383 
384 
385     private
386     {
387         // NOTE: x86 loads implicitly have acquire semantics so a memory
388         //       barrier is only necessary on releases.
389         template needsLoadBarrier( MemoryOrder ms )
390         {
391             enum bool needsLoadBarrier = ms == MemoryOrder.seq;
392         }
393 
394 
395         // NOTE: x86 stores implicitly have release semantics so a memory
396         //       barrier is only necessary on acquires.
397         template needsStoreBarrier( MemoryOrder ms )
398         {
399             enum bool needsStoreBarrier = ms == MemoryOrder.seq;
400         }
401     }
402 
403 
404     HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe
405     if (!__traits(isFloating, T))
406     {
407         static assert( ms != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()" );
408         static assert( __traits(isPOD, T), "argument to atomicLoad() must be POD" );
409 
410         static if ( T.sizeof == byte.sizeof )
411         {
412             //////////////////////////////////////////////////////////////////
413             // 1 Byte Load
414             //////////////////////////////////////////////////////////////////
415 
416             static if ( needsLoadBarrier!(ms) )
417             {
418                 asm pure nothrow @nogc @trusted
419                 {
420                     mov DL, 0;
421                     mov AL, 0;
422                     mov ECX, val;
423                     lock; // lock always needed to make this op atomic
424                     cmpxchg [ECX], DL;
425                 }
426             }
427             else
428             {
429                 asm pure nothrow @nogc @trusted
430                 {
431                     mov EAX, val;
432                     mov AL, [EAX];
433                 }
434             }
435         }
436         else static if ( T.sizeof == short.sizeof )
437         {
438             //////////////////////////////////////////////////////////////////
439             // 2 Byte Load
440             //////////////////////////////////////////////////////////////////
441 
442             static if ( needsLoadBarrier!(ms) )
443             {
444                 asm pure nothrow @nogc @trusted
445                 {
446                     mov DX, 0;
447                     mov AX, 0;
448                     mov ECX, val;
449                     lock; // lock always needed to make this op atomic
450                     cmpxchg [ECX], DX;
451                 }
452             }
453             else
454             {
455                 asm pure nothrow @nogc @trusted
456                 {
457                     mov EAX, val;
458                     mov AX, [EAX];
459                 }
460             }
461         }
462         else static if ( T.sizeof == int.sizeof )
463         {
464             //////////////////////////////////////////////////////////////////
465             // 4 Byte Load
466             //////////////////////////////////////////////////////////////////
467 
468             static if ( needsLoadBarrier!(ms) )
469             {
470                 asm pure nothrow @nogc @trusted
471                 {
472                     mov EDX, 0;
473                     mov EAX, 0;
474                     mov ECX, val;
475                     lock; // lock always needed to make this op atomic
476                     cmpxchg [ECX], EDX;
477                 }
478             }
479             else
480             {
481                 asm pure nothrow @nogc @trusted
482                 {
483                     mov EAX, val;
484                     mov EAX, [EAX];
485                 }
486             }
487         }
488         else static if ( T.sizeof == long.sizeof && has64BitCAS )
489         {
490             //////////////////////////////////////////////////////////////////
491             // 8 Byte Load on a 32-Bit Processor
492             //////////////////////////////////////////////////////////////////
493 
494             asm pure nothrow @nogc @trusted
495             {
496                 push EDI;
497                 push EBX;
498                 mov EBX, 0;
499                 mov ECX, 0;
500                 mov EAX, 0;
501                 mov EDX, 0;
502                 mov EDI, val;
503                 lock; // lock always needed to make this op atomic
504                 cmpxchg8b [EDI];
505                 pop EBX;
506                 pop EDI;
507             }
508         }
509         else
510         {
511             static assert( false, "Invalid template type specified." );
512         }
513     }
514 
515     void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe
516         if ( __traits( compiles, { val = newval; } ) )
517     {
518         static assert( ms != MemoryOrder.acq, "invalid MemoryOrder for atomicStore()" );
519         static assert( __traits(isPOD, T), "argument to atomicStore() must be POD" );
520 
521         static if ( T.sizeof == byte.sizeof )
522         {
523             //////////////////////////////////////////////////////////////////
524             // 1 Byte Store
525             //////////////////////////////////////////////////////////////////
526 
527             static if ( needsStoreBarrier!(ms) )
528             {
529                 asm pure nothrow @nogc @trusted
530                 {
531                     mov EAX, val;
532                     mov DL, newval;
533                     lock;
534                     xchg [EAX], DL;
535                 }
536             }
537             else
538             {
539                 asm pure nothrow @nogc @trusted
540                 {
541                     mov EAX, val;
542                     mov DL, newval;
543                     mov [EAX], DL;
544                 }
545             }
546         }
547         else static if ( T.sizeof == short.sizeof )
548         {
549             //////////////////////////////////////////////////////////////////
550             // 2 Byte Store
551             //////////////////////////////////////////////////////////////////
552 
553             static if ( needsStoreBarrier!(ms) )
554             {
555                 asm pure nothrow @nogc @trusted
556                 {
557                     mov EAX, val;
558                     mov DX, newval;
559                     lock;
560                     xchg [EAX], DX;
561                 }
562             }
563             else
564             {
565                 asm pure nothrow @nogc @trusted
566                 {
567                     mov EAX, val;
568                     mov DX, newval;
569                     mov [EAX], DX;
570                 }
571             }
572         }
573         else static if ( T.sizeof == int.sizeof )
574         {
575             //////////////////////////////////////////////////////////////////
576             // 4 Byte Store
577             //////////////////////////////////////////////////////////////////
578 
579             static if ( needsStoreBarrier!(ms) )
580             {
581                 asm pure nothrow @nogc @trusted
582                 {
583                     mov EAX, val;
584                     mov EDX, newval;
585                     lock;
586                     xchg [EAX], EDX;
587                 }
588             }
589             else
590             {
591                 asm pure nothrow @nogc @trusted
592                 {
593                     mov EAX, val;
594                     mov EDX, newval;
595                     mov [EAX], EDX;
596                 }
597             }
598         }
599         else static if ( T.sizeof == long.sizeof && has64BitCAS )
600         {
601             //////////////////////////////////////////////////////////////////
602             // 8 Byte Store on a 32-Bit Processor
603             //////////////////////////////////////////////////////////////////
604 
605             asm pure nothrow @nogc @trusted
606             {
607                 push EDI;
608                 push EBX;
609                 lea EDI, newval;
610                 mov EBX, [EDI];
611                 mov ECX, 4[EDI];
612                 mov EDI, val;
613                 mov EAX, [EDI];
614                 mov EDX, 4[EDI];
615             L1: lock; // lock always needed to make this op atomic
616                 cmpxchg8b [EDI];
617                 jne L1;
618                 pop EBX;
619                 pop EDI;
620             }
621         }
622         else
623         {
624             static assert( false, "Invalid template type specified." );
625         }
626     }
627 
628 
629     void atomicFence() nothrow @nogc @safe
630     {
631         import core.cpuid;
632 
633         asm pure nothrow @nogc @trusted
634         {
635             naked;
636 
637             call sse2;
638             test AL, AL;
639             jne Lcpuid;
640 
641             // Fast path: We have SSE2, so just use mfence.
642             mfence;
643             jmp Lend;
644 
645         Lcpuid:
646 
647             // Slow path: We use cpuid to serialize. This is
648             // significantly slower than mfence, but is the
649             // only serialization facility we have available
650             // on older non-SSE2 chips.
651             push EBX;
652 
653             mov EAX, 0;
654             cpuid;
655 
656             pop EBX;
657 
658         Lend:
659 
660             ret;
661         }
662     }
663 }
version(AsmX86_64)664 else version (AsmX86_64)
665 {
666     // Uses specialized asm for fast fetch and add operations
667     private HeadUnshared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted
668         if ( __traits(isIntegral, T) )
669     in
670     {
671         assert( atomicValueIsProperlyAligned(val));
672     }
673     body
674     {
675         size_t tmp = mod;
676         asm pure nothrow @nogc @trusted
677         {
678             mov RAX, tmp;
679             mov RDX, val;
680         }
681         static if (T.sizeof == 1) asm pure nothrow @nogc @trusted { lock; xadd[RDX], AL; }
682         else static if (T.sizeof == 2) asm pure nothrow @nogc @trusted { lock; xadd[RDX], AX; }
683         else static if (T.sizeof == 4) asm pure nothrow @nogc @trusted { lock; xadd[RDX], EAX; }
684         else static if (T.sizeof == 8) asm pure nothrow @nogc @trusted { lock; xadd[RDX], RAX; }
685 
686         asm pure nothrow @nogc @trusted
687         {
688             mov tmp, RAX;
689         }
690 
691         return cast(T)tmp;
692     }
693 
694     private HeadUnshared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe
695         if ( __traits(isIntegral, T) )
696     {
697         return atomicFetchAdd(val, -mod);
698     }
699 
700     HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc
701         if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) )
702     in
703     {
704         assert( atomicValueIsProperlyAligned(val));
705     }
706     body
707     {
708         // binary operators
709         //
710         // +    -   *   /   %   ^^  &
711         // |    ^   <<  >>  >>> ~   in
712         // ==   !=  <   <=  >   >=
713         static if ( op == "+"  || op == "-"  || op == "*"  || op == "/"   ||
714                    op == "%"  || op == "^^" || op == "&"  || op == "|"   ||
715                    op == "^"  || op == "<<" || op == ">>" || op == ">>>" ||
716                    op == "~"  || // skip "in"
717                    op == "==" || op == "!=" || op == "<"  || op == "<="  ||
718                    op == ">"  || op == ">=" )
719         {
720             HeadUnshared!(T) get = atomicLoad!(MemoryOrder.raw)( val );
721             mixin( "return get " ~ op ~ " mod;" );
722         }
723         else
724         // assignment operators
725         //
726         // +=   -=  *=  /=  %=  ^^= &=
727         // |=   ^=  <<= >>= >>>=    ~=
728         static if ( op == "+=" && __traits(isIntegral, T) && __traits(isIntegral, V1))
729         {
730             return cast(T)(atomicFetchAdd!(T)(val, mod) + mod);
731         }
732         else static if ( op == "-=" && __traits(isIntegral, T) && __traits(isIntegral, V1))
733         {
734             return cast(T)(atomicFetchSub!(T)(val, mod) - mod);
735         }
736         else static if ( op == "+=" || op == "-="  || op == "*="  || op == "/=" ||
737                    op == "%=" || op == "^^=" || op == "&="  || op == "|=" ||
738                    op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~="
739         {
740             HeadUnshared!(T) get, set;
741 
742             do
743             {
744                 get = set = atomicLoad!(MemoryOrder.raw)( val );
745                 mixin( "set " ~ op ~ " mod;" );
746             } while ( !casByRef( val, get, set ) );
747             return set;
748         }
749         else
750         {
751             static assert( false, "Operation not supported." );
752         }
753     }
754 
755 
756     bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted
757     {
758         return cas(&value, ifThis, writeThis);
759     }
760 
761     bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe
762         if ( !is(T == class) && !is(T U : U*) &&  __traits( compiles, { *here = writeThis; } ) )
763     {
764         return casImpl(here, ifThis, writeThis);
765     }
766 
767     bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe
768         if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) )
769     {
770         return casImpl(here, ifThis, writeThis);
771     }
772 
773     bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe
774         if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) )
775     {
776         return casImpl(here, ifThis, writeThis);
777     }
778 
779     private bool casImpl(T,V1,V2)( shared(T)* here, V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe
780     in
781     {
782         assert( atomicPtrIsProperlyAligned( here ) );
783     }
784     body
785     {
786         static if ( T.sizeof == byte.sizeof )
787         {
788             //////////////////////////////////////////////////////////////////
789             // 1 Byte CAS
790             //////////////////////////////////////////////////////////////////
791 
792             asm pure nothrow @nogc @trusted
793             {
794                 mov DL, writeThis;
795                 mov AL, ifThis;
796                 mov RCX, here;
797                 lock; // lock always needed to make this op atomic
798                 cmpxchg [RCX], DL;
799                 setz AL;
800             }
801         }
802         else static if ( T.sizeof == short.sizeof )
803         {
804             //////////////////////////////////////////////////////////////////
805             // 2 Byte CAS
806             //////////////////////////////////////////////////////////////////
807 
808             asm pure nothrow @nogc @trusted
809             {
810                 mov DX, writeThis;
811                 mov AX, ifThis;
812                 mov RCX, here;
813                 lock; // lock always needed to make this op atomic
814                 cmpxchg [RCX], DX;
815                 setz AL;
816             }
817         }
818         else static if ( T.sizeof == int.sizeof )
819         {
820             //////////////////////////////////////////////////////////////////
821             // 4 Byte CAS
822             //////////////////////////////////////////////////////////////////
823 
824             asm pure nothrow @nogc @trusted
825             {
826                 mov EDX, writeThis;
827                 mov EAX, ifThis;
828                 mov RCX, here;
829                 lock; // lock always needed to make this op atomic
830                 cmpxchg [RCX], EDX;
831                 setz AL;
832             }
833         }
834         else static if ( T.sizeof == long.sizeof )
835         {
836             //////////////////////////////////////////////////////////////////
837             // 8 Byte CAS on a 64-Bit Processor
838             //////////////////////////////////////////////////////////////////
839 
840             asm pure nothrow @nogc @trusted
841             {
842                 mov RDX, writeThis;
843                 mov RAX, ifThis;
844                 mov RCX, here;
845                 lock; // lock always needed to make this op atomic
846                 cmpxchg [RCX], RDX;
847                 setz AL;
848             }
849         }
850         else static if ( T.sizeof == long.sizeof*2 && has128BitCAS)
851         {
852             //////////////////////////////////////////////////////////////////
853             // 16 Byte CAS on a 64-Bit Processor
854             //////////////////////////////////////////////////////////////////
855             version (Win64){
856                 //Windows 64 calling convention uses different registers.
857                 //DMD appears to reverse the register order.
858                 asm pure nothrow @nogc @trusted
859                 {
860                     push RDI;
861                     push RBX;
862                     mov R9, writeThis;
863                     mov R10, ifThis;
864                     mov R11, here;
865 
866                     mov RDI, R9;
867                     mov RBX, [RDI];
868                     mov RCX, 8[RDI];
869 
870                     mov RDI, R10;
871                     mov RAX, [RDI];
872                     mov RDX, 8[RDI];
873 
874                     mov RDI, R11;
875                     lock;
876                     cmpxchg16b [RDI];
877                     setz AL;
878                     pop RBX;
879                     pop RDI;
880                 }
881 
882             }else{
883 
884                 asm pure nothrow @nogc @trusted
885                 {
886                     push RDI;
887                     push RBX;
888                     lea RDI, writeThis;
889                     mov RBX, [RDI];
890                     mov RCX, 8[RDI];
891                     lea RDI, ifThis;
892                     mov RAX, [RDI];
893                     mov RDX, 8[RDI];
894                     mov RDI, here;
895                     lock; // lock always needed to make this op atomic
896                     cmpxchg16b [RDI];
897                     setz AL;
898                     pop RBX;
899                     pop RDI;
900                 }
901             }
902         }
903         else
904         {
905             static assert( false, "Invalid template type specified." );
906         }
907     }
908 
909 
910     enum MemoryOrder
911     {
912         raw,
913         acq,
914         rel,
915         seq,
916     }
917 
918     deprecated("Please use MemoryOrder instead.")
919     alias MemoryOrder msync;
920 
921 
922     private
923     {
924         // NOTE: x86 loads implicitly have acquire semantics so a memory
925         //       barrier is only necessary on releases.
926         template needsLoadBarrier( MemoryOrder ms )
927         {
928             enum bool needsLoadBarrier = ms == MemoryOrder.seq;
929         }
930 
931 
932         // NOTE: x86 stores implicitly have release semantics so a memory
933         //       barrier is only necessary on acquires.
934         template needsStoreBarrier( MemoryOrder ms )
935         {
936             enum bool needsStoreBarrier = ms == MemoryOrder.seq;
937         }
938     }
939 
940 
941     HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe
942     if (!__traits(isFloating, T))
943     {
944         static assert( ms != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()" );
945         static assert( __traits(isPOD, T), "argument to atomicLoad() must be POD" );
946 
947         static if ( T.sizeof == byte.sizeof )
948         {
949             //////////////////////////////////////////////////////////////////
950             // 1 Byte Load
951             //////////////////////////////////////////////////////////////////
952 
953             static if ( needsLoadBarrier!(ms) )
954             {
955                 asm pure nothrow @nogc @trusted
956                 {
957                     mov DL, 0;
958                     mov AL, 0;
959                     mov RCX, val;
960                     lock; // lock always needed to make this op atomic
961                     cmpxchg [RCX], DL;
962                 }
963             }
964             else
965             {
966                 asm pure nothrow @nogc @trusted
967                 {
968                     mov RAX, val;
969                     mov AL, [RAX];
970                 }
971             }
972         }
973         else static if ( T.sizeof == short.sizeof )
974         {
975             //////////////////////////////////////////////////////////////////
976             // 2 Byte Load
977             //////////////////////////////////////////////////////////////////
978 
979             static if ( needsLoadBarrier!(ms) )
980             {
981                 asm pure nothrow @nogc @trusted
982                 {
983                     mov DX, 0;
984                     mov AX, 0;
985                     mov RCX, val;
986                     lock; // lock always needed to make this op atomic
987                     cmpxchg [RCX], DX;
988                 }
989             }
990             else
991             {
992                 asm pure nothrow @nogc @trusted
993                 {
994                     mov RAX, val;
995                     mov AX, [RAX];
996                 }
997             }
998         }
999         else static if ( T.sizeof == int.sizeof )
1000         {
1001             //////////////////////////////////////////////////////////////////
1002             // 4 Byte Load
1003             //////////////////////////////////////////////////////////////////
1004 
1005             static if ( needsLoadBarrier!(ms) )
1006             {
1007                 asm pure nothrow @nogc @trusted
1008                 {
1009                     mov EDX, 0;
1010                     mov EAX, 0;
1011                     mov RCX, val;
1012                     lock; // lock always needed to make this op atomic
1013                     cmpxchg [RCX], EDX;
1014                 }
1015             }
1016             else
1017             {
1018                 asm pure nothrow @nogc @trusted
1019                 {
1020                     mov RAX, val;
1021                     mov EAX, [RAX];
1022                 }
1023             }
1024         }
1025         else static if ( T.sizeof == long.sizeof )
1026         {
1027             //////////////////////////////////////////////////////////////////
1028             // 8 Byte Load
1029             //////////////////////////////////////////////////////////////////
1030 
1031             static if ( needsLoadBarrier!(ms) )
1032             {
1033                 asm pure nothrow @nogc @trusted
1034                 {
1035                     mov RDX, 0;
1036                     mov RAX, 0;
1037                     mov RCX, val;
1038                     lock; // lock always needed to make this op atomic
1039                     cmpxchg [RCX], RDX;
1040                 }
1041             }
1042             else
1043             {
1044                 asm pure nothrow @nogc @trusted
1045                 {
1046                     mov RAX, val;
1047                     mov RAX, [RAX];
1048                 }
1049             }
1050         }
1051         else static if ( T.sizeof == long.sizeof*2 && has128BitCAS )
1052         {
1053             //////////////////////////////////////////////////////////////////
1054             // 16 Byte Load on a 64-Bit Processor
1055             //////////////////////////////////////////////////////////////////
1056             version (Win64){
1057                 size_t[2] retVal;
1058                 asm pure nothrow @nogc @trusted
1059                 {
1060                     push RDI;
1061                     push RBX;
1062                     mov RDI, val;
1063                     mov RBX, 0;
1064                     mov RCX, 0;
1065                     mov RAX, 0;
1066                     mov RDX, 0;
1067                     lock; // lock always needed to make this op atomic
1068                     cmpxchg16b [RDI];
1069                     lea RDI, retVal;
1070                     mov [RDI], RAX;
1071                     mov 8[RDI], RDX;
1072                     pop RBX;
1073                     pop RDI;
1074                 }
1075 
1076                 static if (is(T:U[], U))
1077                 {
1078                     pragma(inline, true)
1079                     static typeof(return) toTrusted(size_t[2] retVal) @trusted
1080                     {
1081                         return *(cast(typeof(return)*) retVal.ptr);
1082                     }
1083 
1084                     return toTrusted(retVal);
1085                 }
1086                 else
1087                 {
1088                     return cast(typeof(return)) retVal;
1089                 }
1090             }else{
1091                 asm pure nothrow @nogc @trusted
1092                 {
1093                     push RDI;
1094                     push RBX;
1095                     mov RBX, 0;
1096                     mov RCX, 0;
1097                     mov RAX, 0;
1098                     mov RDX, 0;
1099                     mov RDI, val;
1100                     lock; // lock always needed to make this op atomic
1101                     cmpxchg16b [RDI];
1102                     pop RBX;
1103                     pop RDI;
1104                 }
1105             }
1106         }
1107         else
1108         {
1109             static assert( false, "Invalid template type specified." );
1110         }
1111     }
1112 
1113 
1114     void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe
1115         if ( __traits( compiles, { val = newval; } ) )
1116     {
1117         static assert( ms != MemoryOrder.acq, "invalid MemoryOrder for atomicStore()" );
1118         static assert( __traits(isPOD, T), "argument to atomicStore() must be POD" );
1119 
1120         static if ( T.sizeof == byte.sizeof )
1121         {
1122             //////////////////////////////////////////////////////////////////
1123             // 1 Byte Store
1124             //////////////////////////////////////////////////////////////////
1125 
1126             static if ( needsStoreBarrier!(ms) )
1127             {
1128                 asm pure nothrow @nogc @trusted
1129                 {
1130                     mov RAX, val;
1131                     mov DL, newval;
1132                     lock;
1133                     xchg [RAX], DL;
1134                 }
1135             }
1136             else
1137             {
1138                 asm pure nothrow @nogc @trusted
1139                 {
1140                     mov RAX, val;
1141                     mov DL, newval;
1142                     mov [RAX], DL;
1143                 }
1144             }
1145         }
1146         else static if ( T.sizeof == short.sizeof )
1147         {
1148             //////////////////////////////////////////////////////////////////
1149             // 2 Byte Store
1150             //////////////////////////////////////////////////////////////////
1151 
1152             static if ( needsStoreBarrier!(ms) )
1153             {
1154                 asm pure nothrow @nogc @trusted
1155                 {
1156                     mov RAX, val;
1157                     mov DX, newval;
1158                     lock;
1159                     xchg [RAX], DX;
1160                 }
1161             }
1162             else
1163             {
1164                 asm pure nothrow @nogc @trusted
1165                 {
1166                     mov RAX, val;
1167                     mov DX, newval;
1168                     mov [RAX], DX;
1169                 }
1170             }
1171         }
1172         else static if ( T.sizeof == int.sizeof )
1173         {
1174             //////////////////////////////////////////////////////////////////
1175             // 4 Byte Store
1176             //////////////////////////////////////////////////////////////////
1177 
1178             static if ( needsStoreBarrier!(ms) )
1179             {
1180                 asm pure nothrow @nogc @trusted
1181                 {
1182                     mov RAX, val;
1183                     mov EDX, newval;
1184                     lock;
1185                     xchg [RAX], EDX;
1186                 }
1187             }
1188             else
1189             {
1190                 asm pure nothrow @nogc @trusted
1191                 {
1192                     mov RAX, val;
1193                     mov EDX, newval;
1194                     mov [RAX], EDX;
1195                 }
1196             }
1197         }
1198         else static if ( T.sizeof == long.sizeof && has64BitCAS )
1199         {
1200             //////////////////////////////////////////////////////////////////
1201             // 8 Byte Store on a 64-Bit Processor
1202             //////////////////////////////////////////////////////////////////
1203 
1204             static if ( needsStoreBarrier!(ms) )
1205             {
1206                 asm pure nothrow @nogc @trusted
1207                 {
1208                     mov RAX, val;
1209                     mov RDX, newval;
1210                     lock;
1211                     xchg [RAX], RDX;
1212                 }
1213             }
1214             else
1215             {
1216                 asm pure nothrow @nogc @trusted
1217                 {
1218                     mov RAX, val;
1219                     mov RDX, newval;
1220                     mov [RAX], RDX;
1221                 }
1222             }
1223         }
1224         else static if ( T.sizeof == long.sizeof*2 && has128BitCAS )
1225         {
1226             //////////////////////////////////////////////////////////////////
1227             // 16 Byte Store on a 64-Bit Processor
1228             //////////////////////////////////////////////////////////////////
1229             version (Win64){
1230                 asm pure nothrow @nogc @trusted
1231                 {
1232                     push RDI;
1233                     push RBX;
1234                     mov R9, val;
1235                     mov R10, newval;
1236 
1237                     mov RDI, R10;
1238                     mov RBX, [RDI];
1239                     mov RCX, 8[RDI];
1240 
1241                     mov RDI, R9;
1242                     mov RAX, [RDI];
1243                     mov RDX, 8[RDI];
1244 
1245                     L1: lock; // lock always needed to make this op atomic
1246                     cmpxchg16b [RDI];
1247                     jne L1;
1248                     pop RBX;
1249                     pop RDI;
1250                 }
1251             }else{
1252                 asm pure nothrow @nogc @trusted
1253                 {
1254                     push RDI;
1255                     push RBX;
1256                     lea RDI, newval;
1257                     mov RBX, [RDI];
1258                     mov RCX, 8[RDI];
1259                     mov RDI, val;
1260                     mov RAX, [RDI];
1261                     mov RDX, 8[RDI];
1262                     L1: lock; // lock always needed to make this op atomic
1263                     cmpxchg16b [RDI];
1264                     jne L1;
1265                     pop RBX;
1266                     pop RDI;
1267                 }
1268             }
1269         }
1270         else
1271         {
1272             static assert( false, "Invalid template type specified." );
1273         }
1274     }
1275 
1276 
1277     void atomicFence() nothrow @nogc @safe
1278     {
1279         // SSE2 is always present in 64-bit x86 chips.
1280         asm nothrow @nogc @trusted
1281         {
1282             naked;
1283 
1284             mfence;
1285             ret;
1286         }
1287     }
1288 }
version(GNU)1289 else version (GNU)
1290 {
1291     import gcc.builtins;
1292 
1293     HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc @trusted
1294         if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) )
1295     {
1296         // binary operators
1297         //
1298         // +    -   *   /   %   ^^  &
1299         // |    ^   <<  >>  >>> ~   in
1300         // ==   !=  <   <=  >   >=
1301         static if ( op == "+"  || op == "-"  || op == "*"  || op == "/"   ||
1302                    op == "%"  || op == "^^" || op == "&"  || op == "|"   ||
1303                    op == "^"  || op == "<<" || op == ">>" || op == ">>>" ||
1304                    op == "~"  || // skip "in"
1305                    op == "==" || op == "!=" || op == "<"  || op == "<="  ||
1306                    op == ">"  || op == ">=" )
1307         {
1308             HeadUnshared!(T) get = atomicLoad!(MemoryOrder.raw)( val );
1309             mixin( "return get " ~ op ~ " mod;" );
1310         }
1311         else
1312         // assignment operators
1313         //
1314         // +=   -=  *=  /=  %=  ^^= &=
1315         // |=   ^=  <<= >>= >>>=    ~=
1316         static if ( op == "+=" || op == "-="  || op == "*="  || op == "/=" ||
1317                    op == "%=" || op == "^^=" || op == "&="  || op == "|=" ||
1318                    op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~="
1319         {
1320             HeadUnshared!(T) get, set;
1321 
1322             do
1323             {
1324                 get = set = atomicLoad!(MemoryOrder.raw)( val );
1325                 mixin( "set " ~ op ~ " mod;" );
1326             } while ( !cas( &val, get, set ) );
1327             return set;
1328         }
1329         else
1330         {
1331             static assert( false, "Operation not supported." );
1332         }
1333     }
1334 
1335 
1336     bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe
1337         if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) )
1338     {
1339         return casImpl(here, ifThis, writeThis);
1340     }
1341 
1342     bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe
1343         if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) )
1344     {
1345         return casImpl(here, ifThis, writeThis);
1346     }
1347 
1348     bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe
1349         if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) )
1350     {
1351         return casImpl(here, ifThis, writeThis);
1352     }
1353 
1354     private bool casImpl(T,V1,V2)( shared(T)* here, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted
1355     {
1356         bool res = void;
1357 
1358         static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
1359         {
1360             static if (T.sizeof == byte.sizeof)
1361             {
1362                 res = __atomic_compare_exchange_1(here, cast(void*) &ifThis, *cast(ubyte*) &writeThis,
1363                                                   false, MemoryOrder.seq, MemoryOrder.seq);
1364             }
1365             else static if (T.sizeof == short.sizeof)
1366             {
1367                 res = __atomic_compare_exchange_2(here, cast(void*) &ifThis, *cast(ushort*) &writeThis,
1368                                                   false, MemoryOrder.seq, MemoryOrder.seq);
1369             }
1370             else static if (T.sizeof == int.sizeof)
1371             {
1372                 res = __atomic_compare_exchange_4(here, cast(void*) &ifThis, *cast(uint*) &writeThis,
1373                                                   false, MemoryOrder.seq, MemoryOrder.seq);
1374             }
1375             else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics)
1376             {
1377                 res = __atomic_compare_exchange_8(here, cast(void*) &ifThis, *cast(ulong*) &writeThis,
1378                                                   false, MemoryOrder.seq, MemoryOrder.seq);
1379             }
1380             else static if (GNU_Have_LibAtomic)
1381             {
1382                 res = __atomic_compare_exchange(T.sizeof, here, cast(void*) &ifThis, cast(void*) &writeThis,
1383                                                 MemoryOrder.seq, MemoryOrder.seq);
1384             }
1385             else
1386                 static assert(0, "Invalid template type specified.");
1387         }
1388         else
1389         {
1390             static if (T.sizeof == byte.sizeof)
1391                 alias U = byte;
1392             else static if (T.sizeof == short.sizeof)
1393                 alias U = short;
1394             else static if (T.sizeof == int.sizeof)
1395                 alias U = int;
1396             else static if (T.sizeof == long.sizeof)
1397                 alias U = long;
1398             else
1399                 static assert(0, "Invalid template type specified.");
1400 
1401             getAtomicMutex.lock();
1402             scope(exit) getAtomicMutex.unlock();
1403 
1404             if (*cast(U*)here == *cast(U*)&ifThis)
1405             {
1406                 *here = writeThis;
1407                 res = true;
1408             }
1409             else
1410                 res = false;
1411         }
1412 
1413         return res;
1414     }
1415 
1416 
1417     // Memory model types for the __atomic* builtins.
1418     enum MemoryOrder
1419     {
1420         raw = 0,
1421         acq = 2,
1422         rel = 3,
1423         seq = 5,
1424     }
1425 
1426     deprecated("Please use MemoryOrder instead.")
1427     alias MemoryOrder msync;
1428 
1429 
1430     HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @trusted
1431     if (!__traits(isFloating, T))
1432     {
1433         static assert(ms != MemoryOrder.rel, "Invalid MemoryOrder for atomicLoad");
1434         static assert(__traits(isPOD, T), "argument to atomicLoad() must be POD");
1435 
1436         static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
1437         {
1438             static if (T.sizeof == ubyte.sizeof)
1439             {
1440                 ubyte value = __atomic_load_1(&val, ms);
1441                 return *cast(HeadUnshared!T*) &value;
1442             }
1443             else static if (T.sizeof == ushort.sizeof)
1444             {
1445                 ushort value = __atomic_load_2(&val, ms);
1446                 return *cast(HeadUnshared!T*) &value;
1447             }
1448             else static if (T.sizeof == uint.sizeof)
1449             {
1450                 uint value = __atomic_load_4(&val, ms);
1451                 return *cast(HeadUnshared!T*) &value;
1452             }
1453             else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics)
1454             {
1455                 ulong value = __atomic_load_8(&val, ms);
1456                 return *cast(HeadUnshared!T*) &value;
1457             }
1458             else static if (GNU_Have_LibAtomic)
1459             {
1460                 T value;
1461                 __atomic_load(T.sizeof, &val, cast(void*)&value, ms);
1462                 return *cast(HeadUnshared!T*) &value;
1463             }
1464             else
1465                 static assert(0, "Invalid template type specified.");
1466         }
1467         else
1468         {
1469             getAtomicMutex.lock();
1470             scope(exit) getAtomicMutex.unlock();
1471             return *cast(HeadUnshared!T*)&val;
1472         }
1473     }
1474 
1475 
1476     void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @trusted
1477         if ( __traits( compiles, { val = newval; } ) )
1478     {
1479         static assert(ms != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore");
1480         static assert(__traits(isPOD, T), "argument to atomicLoad() must be POD");
1481 
1482         static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
1483         {
1484             static if (T.sizeof == ubyte.sizeof)
1485             {
1486                 __atomic_store_1(&val, *cast(ubyte*) &newval, ms);
1487             }
1488             else static if (T.sizeof == ushort.sizeof)
1489             {
1490                 __atomic_store_2(&val, *cast(ushort*) &newval, ms);
1491             }
1492             else static if (T.sizeof == uint.sizeof)
1493             {
1494                 __atomic_store_4(&val, *cast(uint*) &newval, ms);
1495             }
1496             else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics)
1497             {
1498                 __atomic_store_8(&val, *cast(ulong*) &newval, ms);
1499             }
1500             else static if (GNU_Have_LibAtomic)
1501             {
1502                 __atomic_store(T.sizeof, &val, cast(void*)&newval, ms);
1503             }
1504             else
1505                 static assert(0, "Invalid template type specified.");
1506         }
1507         else
1508         {
1509             getAtomicMutex.lock();
1510             val = newval;
1511             getAtomicMutex.unlock();
1512         }
1513     }
1514 
1515 
1516     void atomicFence() nothrow @nogc
1517     {
1518         static if (GNU_Have_Atomics || GNU_Have_LibAtomic)
1519             __atomic_thread_fence(MemoryOrder.seq);
1520         else
1521         {
1522             getAtomicMutex.lock();
1523             getAtomicMutex.unlock();
1524         }
1525     }
1526 
1527     static if (!GNU_Have_Atomics && !GNU_Have_LibAtomic)
1528     {
1529         // Use system mutex for atomics, faking the purity of the functions so
1530         // that they can be used in pure/nothrow/@safe code.
1531         extern (C) private pure @trusted @nogc nothrow
1532         {
1533             static if (GNU_Thread_Model == ThreadModel.Posix)
1534             {
1535                 import core.sys.posix.pthread;
1536                 alias atomicMutexHandle = pthread_mutex_t;
1537 
1538                 pragma(mangle, "pthread_mutex_init") int fakePureMutexInit(pthread_mutex_t*, pthread_mutexattr_t*);
1539                 pragma(mangle, "pthread_mutex_lock") int fakePureMutexLock(pthread_mutex_t*);
1540                 pragma(mangle, "pthread_mutex_unlock") int fakePureMutexUnlock(pthread_mutex_t*);
1541             }
1542             else static if (GNU_Thread_Model == ThreadModel.Win32)
1543             {
1544                 import core.sys.windows.winbase;
1545                 alias atomicMutexHandle = CRITICAL_SECTION;
1546 
1547                 pragma(mangle, "InitializeCriticalSection") int fakePureMutexInit(CRITICAL_SECTION*);
1548                 pragma(mangle, "EnterCriticalSection") void fakePureMutexLock(CRITICAL_SECTION*);
1549                 pragma(mangle, "LeaveCriticalSection") int fakePureMutexUnlock(CRITICAL_SECTION*);
1550             }
1551             else
1552             {
1553                 alias atomicMutexHandle = int;
1554             }
1555         }
1556 
1557         // Implements lock/unlock operations.
1558         private struct AtomicMutex
1559         {
1560             int lock() pure @trusted @nogc nothrow
1561             {
1562                 static if (GNU_Thread_Model == ThreadModel.Posix)
1563                 {
1564                     if (!_inited)
1565                     {
1566                         fakePureMutexInit(&_handle, null);
1567                         _inited = true;
1568                     }
1569                     return fakePureMutexLock(&_handle);
1570                 }
1571                 else
1572                 {
1573                     static if (GNU_Thread_Model == ThreadModel.Win32)
1574                     {
1575                         if (!_inited)
1576                         {
1577                             fakePureMutexInit(&_handle);
1578                             _inited = true;
1579                         }
1580                         fakePureMutexLock(&_handle);
1581                     }
1582                     return 0;
1583                 }
1584             }
1585 
1586             int unlock() pure @trusted @nogc nothrow
1587             {
1588                 static if (GNU_Thread_Model == ThreadModel.Posix)
1589                     return fakePureMutexUnlock(&_handle);
1590                 else
1591                 {
1592                     static if (GNU_Thread_Model == ThreadModel.Win32)
1593                         fakePureMutexUnlock(&_handle);
1594                     return 0;
1595                 }
1596             }
1597 
1598         private:
1599             atomicMutexHandle _handle;
1600             bool _inited;
1601         }
1602 
1603         // Internal static mutex reference.
1604         private AtomicMutex* _getAtomicMutex() @trusted @nogc nothrow
1605         {
1606             __gshared static AtomicMutex mutex;
1607             return &mutex;
1608         }
1609 
1610         // Pure alias for _getAtomicMutex.
1611         pragma(mangle, _getAtomicMutex.mangleof)
1612         private AtomicMutex* getAtomicMutex() pure @trusted @nogc nothrow @property;
1613     }
1614 }
1615 
1616 // This is an ABI adapter that works on all architectures.  It type puns
1617 // floats and doubles to ints and longs, atomically loads them, then puns
1618 // them back.  This is necessary so that they get returned in floating
1619 // point instead of integer registers.
1620 HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @trusted
1621 if (__traits(isFloating, T))
1622 {
1623     static if (T.sizeof == int.sizeof)
1624     {
1625         static assert(is(T : float));
1626         auto ptr = cast(const shared int*) &val;
1627         auto asInt = atomicLoad!(ms)(*ptr);
1628         return *(cast(typeof(return)*) &asInt);
1629     }
1630     else static if (T.sizeof == long.sizeof)
1631     {
1632         static assert(is(T : double));
1633         auto ptr = cast(const shared long*) &val;
1634         auto asLong = atomicLoad!(ms)(*ptr);
1635         return *(cast(typeof(return)*) &asLong);
1636     }
1637     else
1638     {
1639         static assert(0, "Cannot atomically load 80-bit reals.");
1640     }
1641 }
1642 
1643 ////////////////////////////////////////////////////////////////////////////////
1644 // Unit Tests
1645 ////////////////////////////////////////////////////////////////////////////////
1646 
1647 
version(unittest)1648 version (unittest)
1649 {
1650     void testCAS(T)( T val ) pure nothrow @nogc @trusted
1651     in
1652     {
1653         assert(val !is T.init);
1654     }
1655     body
1656     {
1657         T         base = cast(T)null;
1658         shared(T) atom = cast(shared(T))null;
1659 
1660         assert( base !is val, T.stringof );
1661         assert( atom is base, T.stringof );
1662 
1663         assert( cas( &atom, base, val ), T.stringof );
1664         assert( atom is val, T.stringof );
1665         assert( !cas( &atom, base, base ), T.stringof );
1666         assert( atom is val, T.stringof );
1667     }
1668 
1669     void testLoadStore(MemoryOrder ms = MemoryOrder.seq, T)( T val = T.init + 1 ) pure nothrow @nogc @trusted
1670     {
1671         T         base = cast(T) 0;
1672         shared(T) atom = cast(T) 0;
1673 
1674         assert( base !is val );
1675         assert( atom is base );
1676         atomicStore!(ms)( atom, val );
1677         base = atomicLoad!(ms)( atom );
1678 
1679         assert( base is val, T.stringof );
1680         assert( atom is val );
1681     }
1682 
1683 
1684     void testType(T)( T val = T.init + 1 ) pure nothrow @nogc @safe
1685     {
1686         testCAS!(T)( val );
1687         testLoadStore!(MemoryOrder.seq, T)( val );
1688         testLoadStore!(MemoryOrder.raw, T)( val );
1689     }
1690 
1691     @safe pure nothrow unittest
1692     {
1693         testType!(bool)();
1694 
1695         testType!(byte)();
1696         testType!(ubyte)();
1697 
1698         testType!(short)();
1699         testType!(ushort)();
1700 
1701         testType!(int)();
1702         testType!(uint)();
1703 
1704         testType!(shared int*)();
1705 
1706         static class Klass {}
1707         testCAS!(shared Klass)( new shared(Klass) );
1708 
1709         testType!(float)(1.0f);
1710 
1711         static if ( has64BitCAS )
1712         {
1713             testType!(double)(1.0);
1714             testType!(long)();
1715             testType!(ulong)();
1716         }
1717 
1718         shared(size_t) i;
1719 
1720         atomicOp!"+="( i, cast(size_t) 1 );
1721         assert( i == 1 );
1722 
1723         atomicOp!"-="( i, cast(size_t) 1 );
1724         assert( i == 0 );
1725 
1726         shared float f = 0;
1727         atomicOp!"+="( f, 1 );
1728         assert( f == 1 );
1729 
1730         static if ( has64BitCAS )
1731         {
1732             shared double d = 0;
1733             atomicOp!"+="( d, 1 );
1734             assert( d == 1 );
1735         }
1736     }
1737 
1738     pure nothrow unittest
1739     {
1740         static if (has128BitCAS)
1741         {
1742             struct DoubleValue
1743             {
1744                 long value1;
1745                 long value2;
1746             }
1747 
1748             align(16) shared DoubleValue a;
1749             atomicStore(a, DoubleValue(1,2));
1750             assert(a.value1 == 1 && a.value2 ==2);
1751 
1752             while (!cas(&a, DoubleValue(1,2), DoubleValue(3,4))){}
1753             assert(a.value1 == 3 && a.value2 ==4);
1754 
1755             align(16) DoubleValue b = atomicLoad(a);
1756             assert(b.value1 == 3 && b.value2 ==4);
1757         }
1758 
1759         version (D_LP64)
1760         {
1761             enum hasDWCAS = has128BitCAS;
1762         }
1763         else
1764         {
1765             enum hasDWCAS = has64BitCAS;
1766         }
1767 
1768         static if (hasDWCAS)
1769         {
1770             static struct List { size_t gen; List* next; }
1771             shared(List) head;
1772             assert(cas(&head, shared(List)(0, null), shared(List)(1, cast(List*)1)));
1773             assert(head.gen == 1);
1774             assert(cast(size_t)head.next == 1);
1775         }
1776     }
1777 
1778     pure nothrow unittest
1779     {
1780         static struct S { int val; }
1781         auto s = shared(S)(1);
1782 
1783         shared(S*) ptr;
1784 
1785         // head unshared
1786         shared(S)* ifThis = null;
1787         shared(S)* writeThis = &s;
1788         assert(ptr is null);
1789         assert(cas(&ptr, ifThis, writeThis));
1790         assert(ptr is writeThis);
1791 
1792         // head shared
1793         shared(S*) ifThis2 = writeThis;
1794         shared(S*) writeThis2 = null;
1795         assert(cas(&ptr, ifThis2, writeThis2));
1796         assert(ptr is null);
1797 
1798         // head unshared target doesn't want atomic CAS
1799         shared(S)* ptr2;
1800         static assert(!__traits(compiles, cas(&ptr2, ifThis, writeThis)));
1801         static assert(!__traits(compiles, cas(&ptr2, ifThis2, writeThis2)));
1802     }
1803 
1804     unittest
1805     {
1806         import core.thread;
1807 
1808         // Use heap memory to ensure an optimizing
1809         // compiler doesn't put things in registers.
1810         uint* x = new uint();
1811         bool* f = new bool();
1812         uint* r = new uint();
1813 
1814         auto thr = new Thread(()
1815         {
1816             while (!*f)
1817             {
1818             }
1819 
1820             atomicFence();
1821 
1822             *r = *x;
1823         });
1824 
1825         thr.start();
1826 
1827         *x = 42;
1828 
1829         atomicFence();
1830 
1831         *f = true;
1832 
1833         atomicFence();
1834 
1835         thr.join();
1836 
1837         assert(*r == 42);
1838     }
1839 
1840     // === atomicFetchAdd and atomicFetchSub operations ====
1841     pure nothrow @nogc @safe unittest
1842     {
1843         shared ubyte u8 = 1;
1844         shared ushort u16 = 2;
1845         shared uint u32 = 3;
1846         shared byte i8 = 5;
1847         shared short i16 = 6;
1848         shared int i32 = 7;
1849 
1850         assert(atomicOp!"+="(u8, 8) == 9);
1851         assert(atomicOp!"+="(u16, 8) == 10);
1852         assert(atomicOp!"+="(u32, 8) == 11);
1853         assert(atomicOp!"+="(i8, 8) == 13);
1854         assert(atomicOp!"+="(i16, 8) == 14);
1855         assert(atomicOp!"+="(i32, 8) == 15);
1856         version (AsmX86_64)
1857         {
1858             shared ulong u64 = 4;
1859             shared long i64 = 8;
1860             assert(atomicOp!"+="(u64, 8) == 12);
1861             assert(atomicOp!"+="(i64, 8) == 16);
1862         }
1863     }
1864 
1865     pure nothrow @nogc @safe unittest
1866     {
1867         shared ubyte u8 = 1;
1868         shared ushort u16 = 2;
1869         shared uint u32 = 3;
1870         shared byte i8 = 5;
1871         shared short i16 = 6;
1872         shared int i32 = 7;
1873 
1874         assert(atomicOp!"-="(u8, 1) == 0);
1875         assert(atomicOp!"-="(u16, 1) == 1);
1876         assert(atomicOp!"-="(u32, 1) == 2);
1877         assert(atomicOp!"-="(i8, 1) == 4);
1878         assert(atomicOp!"-="(i16, 1) == 5);
1879         assert(atomicOp!"-="(i32, 1) == 6);
1880         version (AsmX86_64)
1881         {
1882             shared ulong u64 = 4;
1883             shared long i64 = 8;
1884             assert(atomicOp!"-="(u64, 1) == 3);
1885             assert(atomicOp!"-="(i64, 1) == 7);
1886         }
1887     }
1888 
1889     pure nothrow @nogc @safe unittest // issue 16651
1890     {
1891         shared ulong a = 2;
1892         uint b = 1;
1893         atomicOp!"-="( a, b );
1894         assert(a == 1);
1895 
1896         shared uint c = 2;
1897         ubyte d = 1;
1898         atomicOp!"-="( c, d );
1899         assert(c == 1);
1900     }
1901 }
1902