1 /*
2     Copyright (c) 2005-2017 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 
16 
17 
18 
19 */
20 
21 #if !defined(__TBB_machine_H) || defined(__TBB_machine_msvc_ia32_common_H)
22 #error Do not #include this internal file directly; use public TBB headers instead.
23 #endif
24 
25 #define __TBB_machine_msvc_ia32_common_H
26 
27 #include <intrin.h>
28 
29 //TODO: consider moving this macro to tbb_config.h and using where MSVC asm is used
30 #if  !_M_X64 || __INTEL_COMPILER
31     #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
32 #else
33     //MSVC in x64 mode does not accept inline assembler
34     #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
35     #define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
36 #endif
37 
38 #if _M_X64
39     #define __TBB_r(reg_name) r##reg_name
40     #define __TBB_W(name) name##64
41     namespace tbb { namespace internal { namespace msvc_intrinsics {
42         typedef __int64 word;
43     }}}
44 #else
45     #define __TBB_r(reg_name) e##reg_name
46     #define __TBB_W(name) name
47     namespace tbb { namespace internal { namespace msvc_intrinsics {
48         typedef long word;
49     }}}
50 #endif
51 
52 #if _MSC_VER>=1600 && (!__INTEL_COMPILER || __INTEL_COMPILER>=1310)
53     // S is the operand size in bytes, B is the suffix for intrinsics for that size
54     #define __TBB_MACHINE_DEFINE_ATOMICS(S,B,T,U)                                           \
55     __pragma(intrinsic( _InterlockedCompareExchange##B ))                                   \
56     static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
57         return _InterlockedCompareExchange##B ( (T*)ptr, value, comparand );                \
58     }                                                                                       \
59     __pragma(intrinsic( _InterlockedExchangeAdd##B ))                                       \
60     static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) {           \
61         return _InterlockedExchangeAdd##B ( (T*)ptr, addend );                              \
62     }                                                                                       \
63     __pragma(intrinsic( _InterlockedExchange##B ))                                          \
64     static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) {          \
65         return _InterlockedExchange##B ( (T*)ptr, value );                                  \
66     }
67 
68     // Atomic intrinsics for 1, 2, and 4 bytes are available for x86 & x64
69     __TBB_MACHINE_DEFINE_ATOMICS(1,8,char,__int8)
70     __TBB_MACHINE_DEFINE_ATOMICS(2,16,short,__int16)
71     __TBB_MACHINE_DEFINE_ATOMICS(4,,long,__int32)
72 
73     #if __TBB_WORDSIZE==8
74     __TBB_MACHINE_DEFINE_ATOMICS(8,64,__int64,__int64)
75     #endif
76 
77     #undef __TBB_MACHINE_DEFINE_ATOMICS
78     #define __TBB_ATOMIC_PRIMITIVES_DEFINED
79 #endif /*_MSC_VER>=1600*/
80 
81 #if _MSC_VER>=1300 || __INTEL_COMPILER>=1100
82     #pragma intrinsic(_ReadWriteBarrier)
83     #pragma intrinsic(_mm_mfence)
84     #define __TBB_compiler_fence()    _ReadWriteBarrier()
85     #define __TBB_full_memory_fence() _mm_mfence()
86 #elif __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
87     #define __TBB_compiler_fence()    __asm { __asm nop }
88     #define __TBB_full_memory_fence() __asm { __asm mfence }
89 #else
90     #error Unsupported compiler; define __TBB_{control,acquire,release}_consistency_helper to support it
91 #endif
92 
93 #define __TBB_control_consistency_helper() __TBB_compiler_fence()
94 #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
95 #define __TBB_release_consistency_helper() __TBB_compiler_fence()
96 
97 #if (_MSC_VER>=1300) || (__INTEL_COMPILER)
98     #pragma intrinsic(_mm_pause)
99     namespace tbb { namespace internal { namespace msvc_intrinsics {
pause(uintptr_t delay)100         static inline void pause (uintptr_t delay ) {
101             for (;delay>0; --delay )
102                 _mm_pause();
103         }
104     }}}
105     #define __TBB_Pause(V) tbb::internal::msvc_intrinsics::pause(V)
106     #define __TBB_SINGLE_PAUSE _mm_pause()
107 #else
108     #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
109         #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
110     #endif
111     namespace tbb { namespace internal { namespace msvc_inline_asm
112         static inline void pause (uintptr_t delay ) {
113             _asm
114             {
115                 mov __TBB_r(ax), delay
116               __TBB_L1:
117                 pause
118                 add __TBB_r(ax), -1
119                 jne __TBB_L1
120             }
121             return;
122         }
123     }}}
124     #define __TBB_Pause(V) tbb::internal::msvc_inline_asm::pause(V)
125     #define __TBB_SINGLE_PAUSE __asm pause
126 #endif
127 
128 #if (_MSC_VER>=1400 && !__INTEL_COMPILER) || (__INTEL_COMPILER>=1200)
129 // MSVC did not have this intrinsic prior to VC8.
130 // ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
131     #pragma intrinsic(__TBB_W(_BitScanReverse))
132     namespace tbb { namespace internal { namespace msvc_intrinsics {
lg_bsr(uintptr_t i)133         static inline uintptr_t lg_bsr( uintptr_t i ){
134             unsigned long j;
135             __TBB_W(_BitScanReverse)( &j, i );
136             return j;
137         }
138     }}}
139     #define __TBB_Log2(V) tbb::internal::msvc_intrinsics::lg_bsr(V)
140 #else
141     #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
142         #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
143     #endif
144     namespace tbb { namespace internal { namespace msvc_inline_asm {
lg_bsr(uintptr_t i)145         static inline uintptr_t lg_bsr( uintptr_t i ){
146             uintptr_t j;
147             __asm
148             {
149                 bsr __TBB_r(ax), i
150                 mov j, __TBB_r(ax)
151             }
152             return j;
153         }
154     }}}
155     #define __TBB_Log2(V) tbb::internal::msvc_inline_asm::lg_bsr(V)
156 #endif
157 
158 #if _MSC_VER>=1400
159     #pragma intrinsic(__TBB_W(_InterlockedOr))
160     #pragma intrinsic(__TBB_W(_InterlockedAnd))
161     namespace tbb { namespace internal { namespace msvc_intrinsics {
lock_or(volatile void * operand,intptr_t addend)162         static inline void lock_or( volatile void *operand, intptr_t addend ){
163             __TBB_W(_InterlockedOr)((volatile word*)operand, addend);
164         }
lock_and(volatile void * operand,intptr_t addend)165         static inline void lock_and( volatile void *operand, intptr_t addend ){
166             __TBB_W(_InterlockedAnd)((volatile word*)operand, addend);
167         }
168     }}}
169     #define __TBB_AtomicOR(P,V)  tbb::internal::msvc_intrinsics::lock_or(P,V)
170     #define __TBB_AtomicAND(P,V) tbb::internal::msvc_intrinsics::lock_and(P,V)
171 #else
172     #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
173         #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
174     #endif
175     namespace tbb { namespace internal { namespace msvc_inline_asm {
lock_or(volatile void * operand,__int32 addend)176         static inline void lock_or( volatile void *operand, __int32 addend ) {
177             __asm
178             {
179                 mov eax, addend
180                 mov edx, [operand]
181                 lock or [edx], eax
182             }
183          }
lock_and(volatile void * operand,__int32 addend)184          static inline void lock_and( volatile void *operand, __int32 addend ) {
185             __asm
186             {
187                 mov eax, addend
188                 mov edx, [operand]
189                 lock and [edx], eax
190             }
191          }
192     }}}
193     #define __TBB_AtomicOR(P,V)  tbb::internal::msvc_inline_asm::lock_or(P,V)
194     #define __TBB_AtomicAND(P,V) tbb::internal::msvc_inline_asm::lock_and(P,V)
195 #endif
196 
197 #pragma intrinsic(__rdtsc)
198 namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
__TBB_machine_time_stamp()199 static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
200     return __rdtsc();
201 }
202 #define __TBB_time_stamp() __TBB_machine_time_stamp()
203 
204 // API to retrieve/update FPU control setting
205 #define __TBB_CPU_CTL_ENV_PRESENT 1
206 
207 namespace tbb { namespace internal { class cpu_ctl_env; } }
208 #if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
__TBB_get_cpu_ctl_env(tbb::internal::cpu_ctl_env * ctl)209     inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
210         __asm {
211             __asm mov     __TBB_r(ax), ctl
212             __asm stmxcsr [__TBB_r(ax)]
213             __asm fstcw   [__TBB_r(ax)+4]
214         }
215     }
__TBB_set_cpu_ctl_env(const tbb::internal::cpu_ctl_env * ctl)216     inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
217         __asm {
218             __asm mov     __TBB_r(ax), ctl
219             __asm ldmxcsr [__TBB_r(ax)]
220             __asm fldcw   [__TBB_r(ax)+4]
221         }
222     }
223 #else
224     extern "C" {
225         void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
226         void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
227     }
228 #endif
229 
230 namespace tbb {
231 namespace internal {
232 class cpu_ctl_env {
233 private:
234     int         mxcsr;
235     short       x87cw;
236     static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
237 public:
238     bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
get_env()239     void get_env() {
240         __TBB_get_cpu_ctl_env( this );
241         mxcsr &= MXCSR_CONTROL_MASK;
242     }
set_env()243     void set_env() const { __TBB_set_cpu_ctl_env( this ); }
244 };
245 } // namespace internal
246 } // namespace tbb
247 
248 #if !__TBB_WIN8UI_SUPPORT
249 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
250 #define __TBB_Yield()  SwitchToThread()
251 #else
252 #include<thread>
253 #define __TBB_Yield()  std::this_thread::yield()
254 #endif
255 
256 #undef __TBB_r
257 #undef __TBB_W
258 #undef __TBB_word
259 
260 extern "C" {
261     __int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
262     void   __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
263 
264     // 'pause' instruction aborts HLE/RTM transactions
__TBB_machine_try_lock_elided_cancel()265     inline static void __TBB_machine_try_lock_elided_cancel() { __TBB_SINGLE_PAUSE; }
266 
267 #if __TBB_TSX_INTRINSICS_PRESENT
268     #define __TBB_machine_is_in_transaction _xtest
269     #define __TBB_machine_begin_transaction _xbegin
270     #define __TBB_machine_end_transaction   _xend
271     // The value (0xFF) below comes from the
272     // Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
273     #define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
274 #else
275     __int8           __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
276     unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
277     void             __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
278     void             __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
279 #endif /* __TBB_TSX_INTRINSICS_PRESENT */
280 }
281