1 /*
2  * Distributed under the Boost Software License, Version 1.0.
3  * (See accompanying file LICENSE_1_0.txt or copy at
4  * http://www.boost.org/LICENSE_1_0.txt)
5  *
6  * Copyright (c) 2009 Helge Bahmann
7  * Copyright (c) 2012 Tim Blechmann
8  * Copyright (c) 2014 Andrey Semashev
9  */
10 /*!
11  * \file   atomic/detail/core_arch_ops_msvc_x86.hpp
12  *
13  * This header contains implementation of the \c core_arch_operations template.
14  */
15 
16 #ifndef BOOST_ATOMIC_DETAIL_CORE_ARCH_OPS_MSVC_X86_HPP_INCLUDED_
17 #define BOOST_ATOMIC_DETAIL_CORE_ARCH_OPS_MSVC_X86_HPP_INCLUDED_
18 
19 #include <cstddef>
20 #include <boost/cstdint.hpp>
21 #include <boost/memory_order.hpp>
22 #include <boost/atomic/detail/config.hpp>
23 #include <boost/atomic/detail/intptr.hpp>
24 #include <boost/atomic/detail/interlocked.hpp>
25 #include <boost/atomic/detail/storage_traits.hpp>
26 #include <boost/atomic/detail/core_arch_operations_fwd.hpp>
27 #include <boost/atomic/detail/type_traits/make_signed.hpp>
28 #include <boost/atomic/detail/capabilities.hpp>
29 #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) || defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
30 #include <boost/cstdint.hpp>
31 #include <boost/atomic/detail/cas_based_exchange.hpp>
32 #include <boost/atomic/detail/core_ops_cas_based.hpp>
33 #endif
34 #include <boost/atomic/detail/ops_msvc_common.hpp>
35 #if !defined(_M_IX86) && !(defined(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE8) && defined(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE16))
36 #include <boost/atomic/detail/extending_cas_based_arithmetic.hpp>
37 #endif
38 #include <boost/atomic/detail/header.hpp>
39 
40 #ifdef BOOST_HAS_PRAGMA_ONCE
41 #pragma once
42 #endif
43 
44 namespace boost {
45 namespace atomics {
46 namespace detail {
47 
48 /*
49  * Implementation note for asm blocks.
50  *
51  * http://msdn.microsoft.com/en-us/data/k1a8ss06%28v=vs.105%29
52  *
53  * Some SSE types require eight-byte stack alignment, forcing the compiler to emit dynamic stack-alignment code.
54  * To be able to access both the local variables and the function parameters after the alignment, the compiler
55  * maintains two frame pointers. If the compiler performs frame pointer omission (FPO), it will use EBP and ESP.
56  * If the compiler does not perform FPO, it will use EBX and EBP. To ensure code runs correctly, do not modify EBX
57  * in asm code if the function requires dynamic stack alignment as it could modify the frame pointer.
58  * Either move the eight-byte aligned types out of the function, or avoid using EBX.
59  *
60  * Since we have no way of knowing that the compiler uses FPO, we have to always save and restore ebx
61  * whenever we have to clobber it. Additionally, we disable warning C4731 in header.hpp so that the compiler
62  * doesn't spam about ebx use.
63  */
64 
65 struct core_arch_operations_msvc_x86_base
66 {
67     static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = false;
68     static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
69 
fence_beforeboost::atomics::detail::core_arch_operations_msvc_x86_base70     static BOOST_FORCEINLINE void fence_before(memory_order) BOOST_NOEXCEPT
71     {
72         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
73     }
74 
fence_afterboost::atomics::detail::core_arch_operations_msvc_x86_base75     static BOOST_FORCEINLINE void fence_after(memory_order) BOOST_NOEXCEPT
76     {
77         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
78     }
79 
fence_after_loadboost::atomics::detail::core_arch_operations_msvc_x86_base80     static BOOST_FORCEINLINE void fence_after_load(memory_order) BOOST_NOEXCEPT
81     {
82         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
83 
84         // On x86 and x86_64 there is no need for a hardware barrier,
85         // even if seq_cst memory order is requested, because all
86         // seq_cst writes are implemented with lock-prefixed operations
87         // or xchg which has implied lock prefix. Therefore normal loads
88         // are already ordered with seq_cst stores on these architectures.
89     }
90 };
91 
92 template< std::size_t Size, bool Signed, bool Interprocess, typename Derived >
93 struct core_arch_operations_msvc_x86 :
94     public core_arch_operations_msvc_x86_base
95 {
96     typedef typename storage_traits< Size >::type storage_type;
97 
98     static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = Size;
99     static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = storage_traits< Size >::alignment;
100     static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed;
101     static BOOST_CONSTEXPR_OR_CONST bool is_interprocess = Interprocess;
102 
storeboost::atomics::detail::core_arch_operations_msvc_x86103     static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
104     {
105         if (order != memory_order_seq_cst)
106         {
107             fence_before(order);
108             storage = v;
109             fence_after(order);
110         }
111         else
112         {
113             Derived::exchange(storage, v, order);
114         }
115     }
116 
loadboost::atomics::detail::core_arch_operations_msvc_x86117     static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order order) BOOST_NOEXCEPT
118     {
119         storage_type v = storage;
120         fence_after_load(order);
121         return v;
122     }
123 
fetch_subboost::atomics::detail::core_arch_operations_msvc_x86124     static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
125     {
126         typedef typename boost::atomics::detail::make_signed< storage_type >::type signed_storage_type;
127         return Derived::fetch_add(storage, static_cast< storage_type >(-static_cast< signed_storage_type >(v)), order);
128     }
129 
compare_exchange_weakboost::atomics::detail::core_arch_operations_msvc_x86130     static BOOST_FORCEINLINE bool compare_exchange_weak(
131         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
132     {
133         return Derived::compare_exchange_strong(storage, expected, desired, success_order, failure_order);
134     }
135 
test_and_setboost::atomics::detail::core_arch_operations_msvc_x86136     static BOOST_FORCEINLINE bool test_and_set(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT
137     {
138         return !!Derived::exchange(storage, (storage_type)1, order);
139     }
140 
clearboost::atomics::detail::core_arch_operations_msvc_x86141     static BOOST_FORCEINLINE void clear(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT
142     {
143         store(storage, (storage_type)0, order);
144     }
145 };
146 
147 template< bool Signed, bool Interprocess >
148 struct core_arch_operations< 4u, Signed, Interprocess > :
149     public core_arch_operations_msvc_x86< 4u, Signed, Interprocess, core_arch_operations< 4u, Signed, Interprocess > >
150 {
151     typedef core_arch_operations_msvc_x86< 4u, Signed, Interprocess, core_arch_operations< 4u, Signed, Interprocess > > base_type;
152     typedef typename base_type::storage_type storage_type;
153 
fetch_addboost::atomics::detail::core_arch_operations154     static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
155     {
156         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD(&storage, v));
157     }
158 
exchangeboost::atomics::detail::core_arch_operations159     static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
160     {
161         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_EXCHANGE(&storage, v));
162     }
163 
compare_exchange_strongboost::atomics::detail::core_arch_operations164     static BOOST_FORCEINLINE bool compare_exchange_strong(
165         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
166     {
167         storage_type previous = expected;
168         storage_type old_val = static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE(&storage, desired, previous));
169         expected = old_val;
170         return (previous == old_val);
171     }
172 
173 #if defined(BOOST_ATOMIC_INTERLOCKED_AND)
fetch_andboost::atomics::detail::core_arch_operations174     static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
175     {
176         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_AND(&storage, v));
177     }
178 #else
fetch_andboost::atomics::detail::core_arch_operations179     static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
180     {
181         storage_type res = storage;
182         while (!compare_exchange_strong(storage, res, res & v, order, memory_order_relaxed)) {}
183         return res;
184     }
185 #endif
186 
187 #if defined(BOOST_ATOMIC_INTERLOCKED_OR)
fetch_orboost::atomics::detail::core_arch_operations188     static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
189     {
190         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_OR(&storage, v));
191     }
192 #else
fetch_orboost::atomics::detail::core_arch_operations193     static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
194     {
195         storage_type res = storage;
196         while (!compare_exchange_strong(storage, res, res | v, order, memory_order_relaxed)) {}
197         return res;
198     }
199 #endif
200 
201 #if defined(BOOST_ATOMIC_INTERLOCKED_XOR)
fetch_xorboost::atomics::detail::core_arch_operations202     static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
203     {
204         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_XOR(&storage, v));
205     }
206 #else
fetch_xorboost::atomics::detail::core_arch_operations207     static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
208     {
209         storage_type res = storage;
210         while (!compare_exchange_strong(storage, res, res ^ v, order, memory_order_relaxed)) {}
211         return res;
212     }
213 #endif
214 };
215 
216 #if defined(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE8)
217 
218 template< bool Signed, bool Interprocess >
219 struct core_arch_operations< 1u, Signed, Interprocess > :
220     public core_arch_operations_msvc_x86< 1u, Signed, Interprocess, core_arch_operations< 1u, Signed, Interprocess > >
221 {
222     typedef core_arch_operations_msvc_x86< 1u, Signed, Interprocess, core_arch_operations< 1u, Signed, Interprocess > > base_type;
223     typedef typename base_type::storage_type storage_type;
224 
fetch_addboost::atomics::detail::core_arch_operations225     static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
226     {
227         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD8(&storage, v));
228     }
229 
exchangeboost::atomics::detail::core_arch_operations230     static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
231     {
232         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_EXCHANGE8(&storage, v));
233     }
234 
compare_exchange_strongboost::atomics::detail::core_arch_operations235     static BOOST_FORCEINLINE bool compare_exchange_strong(
236         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
237     {
238         storage_type previous = expected;
239         storage_type old_val = static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE8(&storage, desired, previous));
240         expected = old_val;
241         return (previous == old_val);
242     }
243 
fetch_andboost::atomics::detail::core_arch_operations244     static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
245     {
246         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_AND8(&storage, v));
247     }
248 
fetch_orboost::atomics::detail::core_arch_operations249     static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
250     {
251         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_OR8(&storage, v));
252     }
253 
fetch_xorboost::atomics::detail::core_arch_operations254     static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
255     {
256         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_XOR8(&storage, v));
257     }
258 };
259 
260 #elif defined(_M_IX86)
261 
262 template< bool Signed, bool Interprocess >
263 struct core_arch_operations< 1u, Signed, Interprocess > :
264     public core_arch_operations_msvc_x86< 1u, Signed, Interprocess, core_arch_operations< 1u, Signed, Interprocess > >
265 {
266     typedef core_arch_operations_msvc_x86< 1u, Signed, Interprocess, core_arch_operations< 1u, Signed, Interprocess > > base_type;
267     typedef typename base_type::storage_type storage_type;
268 
fetch_addboost::atomics::detail::core_arch_operations269     static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
270     {
271         base_type::fence_before(order);
272         __asm
273         {
274             mov edx, storage
275             movzx eax, v
276             lock xadd byte ptr [edx], al
277             mov v, al
278         };
279         base_type::fence_after(order);
280         return v;
281     }
282 
exchangeboost::atomics::detail::core_arch_operations283     static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
284     {
285         base_type::fence_before(order);
286         __asm
287         {
288             mov edx, storage
289             movzx eax, v
290             xchg byte ptr [edx], al
291             mov v, al
292         };
293         base_type::fence_after(order);
294         return v;
295     }
296 
compare_exchange_strongboost::atomics::detail::core_arch_operations297     static BOOST_FORCEINLINE bool compare_exchange_strong(
298         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order) BOOST_NOEXCEPT
299     {
300         base_type::fence_before(success_order);
301         bool success;
302         __asm
303         {
304             mov esi, expected
305             mov edi, storage
306             movzx eax, byte ptr [esi]
307             movzx edx, desired
308             lock cmpxchg byte ptr [edi], dl
309             mov byte ptr [esi], al
310             sete success
311         };
312         // The success and failure fences are equivalent anyway
313         base_type::fence_after(success_order);
314         return success;
315     }
316 
fetch_andboost::atomics::detail::core_arch_operations317     static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
318     {
319         base_type::fence_before(order);
320         __asm
321         {
322             mov edi, storage
323             movzx ecx, v
324             xor edx, edx
325             movzx eax, byte ptr [edi]
326             align 16
327         again:
328             mov dl, al
329             and dl, cl
330             lock cmpxchg byte ptr [edi], dl
331             jne again
332             mov v, al
333         };
334         base_type::fence_after(order);
335         return v;
336     }
337 
fetch_orboost::atomics::detail::core_arch_operations338     static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
339     {
340         base_type::fence_before(order);
341         __asm
342         {
343             mov edi, storage
344             movzx ecx, v
345             xor edx, edx
346             movzx eax, byte ptr [edi]
347             align 16
348         again:
349             mov dl, al
350             or dl, cl
351             lock cmpxchg byte ptr [edi], dl
352             jne again
353             mov v, al
354         };
355         base_type::fence_after(order);
356         return v;
357     }
358 
fetch_xorboost::atomics::detail::core_arch_operations359     static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
360     {
361         base_type::fence_before(order);
362         __asm
363         {
364             mov edi, storage
365             movzx ecx, v
366             xor edx, edx
367             movzx eax, byte ptr [edi]
368             align 16
369         again:
370             mov dl, al
371             xor dl, cl
372             lock cmpxchg byte ptr [edi], dl
373             jne again
374             mov v, al
375         };
376         base_type::fence_after(order);
377         return v;
378     }
379 };
380 
381 #else
382 
383 template< bool Signed, bool Interprocess >
384 struct core_arch_operations< 1u, Signed, Interprocess > :
385     public extending_cas_based_arithmetic< core_arch_operations< 4u, Signed, Interprocess >, 1u, Signed >
386 {
387 };
388 
389 #endif
390 
391 #if defined(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE16)
392 
393 template< bool Signed, bool Interprocess >
394 struct core_arch_operations< 2u, Signed, Interprocess > :
395     public core_arch_operations_msvc_x86< 2u, Signed, Interprocess, core_arch_operations< 2u, Signed, Interprocess > >
396 {
397     typedef core_arch_operations_msvc_x86< 2u, Signed, Interprocess, core_arch_operations< 2u, Signed, Interprocess > > base_type;
398     typedef typename base_type::storage_type storage_type;
399 
fetch_addboost::atomics::detail::core_arch_operations400     static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
401     {
402         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD16(&storage, v));
403     }
404 
exchangeboost::atomics::detail::core_arch_operations405     static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
406     {
407         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_EXCHANGE16(&storage, v));
408     }
409 
compare_exchange_strongboost::atomics::detail::core_arch_operations410     static BOOST_FORCEINLINE bool compare_exchange_strong(
411         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
412     {
413         storage_type previous = expected;
414         storage_type old_val = static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE16(&storage, desired, previous));
415         expected = old_val;
416         return (previous == old_val);
417     }
418 
fetch_andboost::atomics::detail::core_arch_operations419     static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
420     {
421         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_AND16(&storage, v));
422     }
423 
fetch_orboost::atomics::detail::core_arch_operations424     static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
425     {
426         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_OR16(&storage, v));
427     }
428 
fetch_xorboost::atomics::detail::core_arch_operations429     static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
430     {
431         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_XOR16(&storage, v));
432     }
433 };
434 
435 #elif defined(_M_IX86)
436 
437 template< bool Signed, bool Interprocess >
438 struct core_arch_operations< 2u, Signed, Interprocess > :
439     public core_arch_operations_msvc_x86< 2u, Signed, Interprocess, core_arch_operations< 2u, Signed, Interprocess > >
440 {
441     typedef core_arch_operations_msvc_x86< 2u, Signed, Interprocess, core_arch_operations< 2u, Signed, Interprocess > > base_type;
442     typedef typename base_type::storage_type storage_type;
443 
fetch_addboost::atomics::detail::core_arch_operations444     static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
445     {
446         base_type::fence_before(order);
447         __asm
448         {
449             mov edx, storage
450             movzx eax, v
451             lock xadd word ptr [edx], ax
452             mov v, ax
453         };
454         base_type::fence_after(order);
455         return v;
456     }
457 
exchangeboost::atomics::detail::core_arch_operations458     static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
459     {
460         base_type::fence_before(order);
461         __asm
462         {
463             mov edx, storage
464             movzx eax, v
465             xchg word ptr [edx], ax
466             mov v, ax
467         };
468         base_type::fence_after(order);
469         return v;
470     }
471 
compare_exchange_strongboost::atomics::detail::core_arch_operations472     static BOOST_FORCEINLINE bool compare_exchange_strong(
473         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order) BOOST_NOEXCEPT
474     {
475         base_type::fence_before(success_order);
476         bool success;
477         __asm
478         {
479             mov esi, expected
480             mov edi, storage
481             movzx eax, word ptr [esi]
482             movzx edx, desired
483             lock cmpxchg word ptr [edi], dx
484             mov word ptr [esi], ax
485             sete success
486         };
487         // The success and failure fences are equivalent anyway
488         base_type::fence_after(success_order);
489         return success;
490     }
491 
fetch_andboost::atomics::detail::core_arch_operations492     static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
493     {
494         base_type::fence_before(order);
495         __asm
496         {
497             mov edi, storage
498             movzx ecx, v
499             xor edx, edx
500             movzx eax, word ptr [edi]
501             align 16
502         again:
503             mov dx, ax
504             and dx, cx
505             lock cmpxchg word ptr [edi], dx
506             jne again
507             mov v, ax
508         };
509         base_type::fence_after(order);
510         return v;
511     }
512 
fetch_orboost::atomics::detail::core_arch_operations513     static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
514     {
515         base_type::fence_before(order);
516         __asm
517         {
518             mov edi, storage
519             movzx ecx, v
520             xor edx, edx
521             movzx eax, word ptr [edi]
522             align 16
523         again:
524             mov dx, ax
525             or dx, cx
526             lock cmpxchg word ptr [edi], dx
527             jne again
528             mov v, ax
529         };
530         base_type::fence_after(order);
531         return v;
532     }
533 
fetch_xorboost::atomics::detail::core_arch_operations534     static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
535     {
536         base_type::fence_before(order);
537         __asm
538         {
539             mov edi, storage
540             movzx ecx, v
541             xor edx, edx
542             movzx eax, word ptr [edi]
543             align 16
544         again:
545             mov dx, ax
546             xor dx, cx
547             lock cmpxchg word ptr [edi], dx
548             jne again
549             mov v, ax
550         };
551         base_type::fence_after(order);
552         return v;
553     }
554 };
555 
556 #else
557 
558 template< bool Signed, bool Interprocess >
559 struct core_arch_operations< 2u, Signed, Interprocess > :
560     public extending_cas_based_arithmetic< core_arch_operations< 4u, Signed, Interprocess >, 2u, Signed >
561 {
562 };
563 
564 #endif
565 
566 
567 #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
568 
569 template< bool Signed, bool Interprocess >
570 struct msvc_dcas_x86
571 {
572     typedef typename storage_traits< 8u >::type storage_type;
573 
574     static BOOST_CONSTEXPR_OR_CONST bool is_interprocess = Interprocess;
575     static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
576     static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
577 
578     static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = 8u;
579     static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = 8u;
580     static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed;
581 
582     // Intel 64 and IA-32 Architectures Software Developer's Manual, Volume 3A, 8.1.1. Guaranteed Atomic Operations:
583     //
584     // The Pentium processor (and newer processors since) guarantees that the following additional memory operations will always be carried out atomically:
585     // * Reading or writing a quadword aligned on a 64-bit boundary
586     //
587     // Luckily, the memory is almost always 8-byte aligned in our case because atomic<> uses 64 bit native types for storage and dynamic memory allocations
588     // have at least 8 byte alignment. The only unfortunate case is when atomic is placed on the stack and it is not 8-byte aligned (like on 32 bit Windows).
589 
storeboost::atomics::detail::msvc_dcas_x86590     static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
591     {
592         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
593 
594         storage_type volatile* p = &storage;
595         if (((uintptr_t)p & 0x00000007) == 0)
596         {
597 #if defined(_M_IX86_FP) && _M_IX86_FP >= 2
598 #if defined(__AVX__)
599             __asm
600             {
601                 mov edx, p
602                 vmovq xmm4, v
603                 vmovq qword ptr [edx], xmm4
604             };
605 #else
606             __asm
607             {
608                 mov edx, p
609                 movq xmm4, v
610                 movq qword ptr [edx], xmm4
611             };
612 #endif
613 #else
614             __asm
615             {
616                 mov edx, p
617                 fild v
618                 fistp qword ptr [edx]
619             };
620 #endif
621         }
622         else
623         {
624             uint32_t backup;
625             __asm
626             {
627                 mov backup, ebx
628                 mov edi, p
629                 mov ebx, dword ptr [v]
630                 mov ecx, dword ptr [v + 4]
631                 mov eax, dword ptr [edi]
632                 mov edx, dword ptr [edi + 4]
633                 align 16
634             again:
635                 lock cmpxchg8b qword ptr [edi]
636                 jne again
637                 mov ebx, backup
638             };
639         }
640 
641         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
642     }
643 
loadboost::atomics::detail::msvc_dcas_x86644     static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
645     {
646         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
647 
648         storage_type const volatile* p = &storage;
649         storage_type value;
650 
651         if (((uintptr_t)p & 0x00000007) == 0)
652         {
653 #if defined(_M_IX86_FP) && _M_IX86_FP >= 2
654 #if defined(__AVX__)
655             __asm
656             {
657                 mov edx, p
658                 vmovq xmm4, qword ptr [edx]
659                 vmovq value, xmm4
660             };
661 #else
662             __asm
663             {
664                 mov edx, p
665                 movq xmm4, qword ptr [edx]
666                 movq value, xmm4
667             };
668 #endif
669 #else
670             __asm
671             {
672                 mov edx, p
673                 fild qword ptr [edx]
674                 fistp value
675             };
676 #endif
677         }
678         else
679         {
680             // We don't care for comparison result here; the previous value will be stored into value anyway.
681             // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
682             __asm
683             {
684                 mov edi, p
685                 mov eax, ebx
686                 mov edx, ecx
687                 lock cmpxchg8b qword ptr [edi]
688                 mov dword ptr [value], eax
689                 mov dword ptr [value + 4], edx
690             };
691         }
692 
693         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
694 
695         return value;
696     }
697 
compare_exchange_strongboost::atomics::detail::msvc_dcas_x86698     static BOOST_FORCEINLINE bool compare_exchange_strong(
699         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
700     {
701         // MSVC-11 in 32-bit mode sometimes generates messed up code without compiler barriers,
702         // even though the _InterlockedCompareExchange64 intrinsic already provides one.
703         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
704 
705         storage_type volatile* p = &storage;
706 #if defined(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE64)
707         const storage_type old_val = (storage_type)BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE64(p, desired, expected);
708         const bool result = (old_val == expected);
709         expected = old_val;
710 #else
711         bool result;
712         uint32_t backup;
713         __asm
714         {
715             mov backup, ebx
716             mov edi, p
717             mov esi, expected
718             mov ebx, dword ptr [desired]
719             mov ecx, dword ptr [desired + 4]
720             mov eax, dword ptr [esi]
721             mov edx, dword ptr [esi + 4]
722             lock cmpxchg8b qword ptr [edi]
723             mov dword ptr [esi], eax
724             mov dword ptr [esi + 4], edx
725             mov ebx, backup
726             sete result
727         };
728 #endif
729         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
730 
731         return result;
732     }
733 
compare_exchange_weakboost::atomics::detail::msvc_dcas_x86734     static BOOST_FORCEINLINE bool compare_exchange_weak(
735         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
736     {
737         return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
738     }
739 
exchangeboost::atomics::detail::msvc_dcas_x86740     static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
741     {
742         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
743 
744         storage_type volatile* p = &storage;
745         uint32_t backup;
746         __asm
747         {
748             mov backup, ebx
749             mov edi, p
750             mov ebx, dword ptr [v]
751             mov ecx, dword ptr [v + 4]
752             mov eax, dword ptr [edi]
753             mov edx, dword ptr [edi + 4]
754             align 16
755         again:
756             lock cmpxchg8b qword ptr [edi]
757             jne again
758             mov ebx, backup
759             mov dword ptr [v], eax
760             mov dword ptr [v + 4], edx
761         };
762 
763         BOOST_ATOMIC_DETAIL_COMPILER_BARRIER();
764 
765         return v;
766     }
767 };
768 
769 template< bool Signed, bool Interprocess >
770 struct core_arch_operations< 8u, Signed, Interprocess > :
771     public core_operations_cas_based< msvc_dcas_x86< Signed, Interprocess > >
772 {
773 };
774 
775 #elif defined(_M_AMD64)
776 
777 template< bool Signed, bool Interprocess >
778 struct core_arch_operations< 8u, Signed, Interprocess > :
779     public core_arch_operations_msvc_x86< 8u, Signed, Interprocess, core_arch_operations< 8u, Signed, Interprocess > >
780 {
781     typedef core_arch_operations_msvc_x86< 8u, Signed, Interprocess, core_arch_operations< 8u, Signed, Interprocess > > base_type;
782     typedef typename base_type::storage_type storage_type;
783 
fetch_addboost::atomics::detail::core_arch_operations784     static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
785     {
786         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD64(&storage, v));
787     }
788 
exchangeboost::atomics::detail::core_arch_operations789     static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
790     {
791         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_EXCHANGE64(&storage, v));
792     }
793 
compare_exchange_strongboost::atomics::detail::core_arch_operations794     static BOOST_FORCEINLINE bool compare_exchange_strong(
795         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
796     {
797         storage_type previous = expected;
798         storage_type old_val = static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE64(&storage, desired, previous));
799         expected = old_val;
800         return (previous == old_val);
801     }
802 
fetch_andboost::atomics::detail::core_arch_operations803     static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
804     {
805         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_AND64(&storage, v));
806     }
807 
fetch_orboost::atomics::detail::core_arch_operations808     static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
809     {
810         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_OR64(&storage, v));
811     }
812 
fetch_xorboost::atomics::detail::core_arch_operations813     static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
814     {
815         return static_cast< storage_type >(BOOST_ATOMIC_INTERLOCKED_XOR64(&storage, v));
816     }
817 };
818 
819 #endif
820 
821 #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
822 
823 template< bool Signed, bool Interprocess >
824 struct msvc_dcas_x86_64
825 {
826     typedef typename storage_traits< 16u >::type storage_type;
827 
828     static BOOST_CONSTEXPR_OR_CONST bool is_interprocess = Interprocess;
829     static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
830     static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
831 
832     static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = 16u;
833     static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = 16u;
834     static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed;
835 
storeboost::atomics::detail::msvc_dcas_x86_64836     static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
837     {
838         storage_type value = const_cast< storage_type& >(storage);
839         while (!BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE128(&storage, v, &value)) {}
840     }
841 
loadboost::atomics::detail::msvc_dcas_x86_64842     static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
843     {
844         storage_type value = storage_type();
845         BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE128(&storage, value, &value);
846         return value;
847     }
848 
compare_exchange_strongboost::atomics::detail::msvc_dcas_x86_64849     static BOOST_FORCEINLINE bool compare_exchange_strong(
850         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
851     {
852         return !!BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE128(&storage, desired, &expected);
853     }
854 
compare_exchange_weakboost::atomics::detail::msvc_dcas_x86_64855     static BOOST_FORCEINLINE bool compare_exchange_weak(
856         storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
857     {
858         return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
859     }
860 };
861 
862 template< bool Signed, bool Interprocess >
863 struct core_arch_operations< 16u, Signed, Interprocess > :
864     public core_operations_cas_based< cas_based_exchange< msvc_dcas_x86_64< Signed, Interprocess > > >
865 {
866 };
867 
868 #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
869 
870 } // namespace detail
871 } // namespace atomics
872 } // namespace boost
873 
874 #include <boost/atomic/detail/footer.hpp>
875 
876 #endif // BOOST_ATOMIC_DETAIL_CORE_ARCH_OPS_MSVC_X86_HPP_INCLUDED_
877