1 /*
2 Copyright (c) 2005-2017 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16
17
18
19 */
20
21 #if !defined(__TBB_machine_H) || defined(__TBB_machine_msvc_ia32_common_H)
22 #error Do not #include this internal file directly; use public TBB headers instead.
23 #endif
24
25 #define __TBB_machine_msvc_ia32_common_H
26
27 #include <intrin.h>
28
29 //TODO: consider moving this macro to tbb_config.h and using where MSVC asm is used
30 #if !_M_X64 || __INTEL_COMPILER
31 #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
32 #else
33 //MSVC in x64 mode does not accept inline assembler
34 #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
35 #define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
36 #endif
37
38 #if _M_X64
39 #define __TBB_r(reg_name) r##reg_name
40 #define __TBB_W(name) name##64
41 namespace tbb { namespace internal { namespace msvc_intrinsics {
42 typedef __int64 word;
43 }}}
44 #else
45 #define __TBB_r(reg_name) e##reg_name
46 #define __TBB_W(name) name
47 namespace tbb { namespace internal { namespace msvc_intrinsics {
48 typedef long word;
49 }}}
50 #endif
51
52 #if _MSC_VER>=1600 && (!__INTEL_COMPILER || __INTEL_COMPILER>=1310)
53 // S is the operand size in bytes, B is the suffix for intrinsics for that size
54 #define __TBB_MACHINE_DEFINE_ATOMICS(S,B,T,U) \
55 __pragma(intrinsic( _InterlockedCompareExchange##B )) \
56 static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
57 return _InterlockedCompareExchange##B ( (T*)ptr, value, comparand ); \
58 } \
59 __pragma(intrinsic( _InterlockedExchangeAdd##B )) \
60 static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
61 return _InterlockedExchangeAdd##B ( (T*)ptr, addend ); \
62 } \
63 __pragma(intrinsic( _InterlockedExchange##B )) \
64 static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
65 return _InterlockedExchange##B ( (T*)ptr, value ); \
66 }
67
68 // Atomic intrinsics for 1, 2, and 4 bytes are available for x86 & x64
69 __TBB_MACHINE_DEFINE_ATOMICS(1,8,char,__int8)
70 __TBB_MACHINE_DEFINE_ATOMICS(2,16,short,__int16)
71 __TBB_MACHINE_DEFINE_ATOMICS(4,,long,__int32)
72
73 #if __TBB_WORDSIZE==8
74 __TBB_MACHINE_DEFINE_ATOMICS(8,64,__int64,__int64)
75 #endif
76
77 #undef __TBB_MACHINE_DEFINE_ATOMICS
78 #define __TBB_ATOMIC_PRIMITIVES_DEFINED
79 #endif /*_MSC_VER>=1600*/
80
81 #if _MSC_VER>=1300 || __INTEL_COMPILER>=1100
82 #pragma intrinsic(_ReadWriteBarrier)
83 #pragma intrinsic(_mm_mfence)
84 #define __TBB_compiler_fence() _ReadWriteBarrier()
85 #define __TBB_full_memory_fence() _mm_mfence()
86 #elif __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
87 #define __TBB_compiler_fence() __asm { __asm nop }
88 #define __TBB_full_memory_fence() __asm { __asm mfence }
89 #else
90 #error Unsupported compiler; define __TBB_{control,acquire,release}_consistency_helper to support it
91 #endif
92
93 #define __TBB_control_consistency_helper() __TBB_compiler_fence()
94 #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
95 #define __TBB_release_consistency_helper() __TBB_compiler_fence()
96
97 #if (_MSC_VER>=1300) || (__INTEL_COMPILER)
98 #pragma intrinsic(_mm_pause)
99 namespace tbb { namespace internal { namespace msvc_intrinsics {
pause(uintptr_t delay)100 static inline void pause (uintptr_t delay ) {
101 for (;delay>0; --delay )
102 _mm_pause();
103 }
104 }}}
105 #define __TBB_Pause(V) tbb::internal::msvc_intrinsics::pause(V)
106 #define __TBB_SINGLE_PAUSE _mm_pause()
107 #else
108 #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
109 #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
110 #endif
111 namespace tbb { namespace internal { namespace msvc_inline_asm
112 static inline void pause (uintptr_t delay ) {
113 _asm
114 {
115 mov __TBB_r(ax), delay
116 __TBB_L1:
117 pause
118 add __TBB_r(ax), -1
119 jne __TBB_L1
120 }
121 return;
122 }
123 }}}
124 #define __TBB_Pause(V) tbb::internal::msvc_inline_asm::pause(V)
125 #define __TBB_SINGLE_PAUSE __asm pause
126 #endif
127
128 #if (_MSC_VER>=1400 && !__INTEL_COMPILER) || (__INTEL_COMPILER>=1200)
129 // MSVC did not have this intrinsic prior to VC8.
130 // ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
131 #pragma intrinsic(__TBB_W(_BitScanReverse))
132 namespace tbb { namespace internal { namespace msvc_intrinsics {
lg_bsr(uintptr_t i)133 static inline uintptr_t lg_bsr( uintptr_t i ){
134 unsigned long j;
135 __TBB_W(_BitScanReverse)( &j, i );
136 return j;
137 }
138 }}}
139 #define __TBB_Log2(V) tbb::internal::msvc_intrinsics::lg_bsr(V)
140 #else
141 #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
142 #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
143 #endif
144 namespace tbb { namespace internal { namespace msvc_inline_asm {
lg_bsr(uintptr_t i)145 static inline uintptr_t lg_bsr( uintptr_t i ){
146 uintptr_t j;
147 __asm
148 {
149 bsr __TBB_r(ax), i
150 mov j, __TBB_r(ax)
151 }
152 return j;
153 }
154 }}}
155 #define __TBB_Log2(V) tbb::internal::msvc_inline_asm::lg_bsr(V)
156 #endif
157
158 #if _MSC_VER>=1400
159 #pragma intrinsic(__TBB_W(_InterlockedOr))
160 #pragma intrinsic(__TBB_W(_InterlockedAnd))
161 namespace tbb { namespace internal { namespace msvc_intrinsics {
lock_or(volatile void * operand,intptr_t addend)162 static inline void lock_or( volatile void *operand, intptr_t addend ){
163 __TBB_W(_InterlockedOr)((volatile word*)operand, addend);
164 }
lock_and(volatile void * operand,intptr_t addend)165 static inline void lock_and( volatile void *operand, intptr_t addend ){
166 __TBB_W(_InterlockedAnd)((volatile word*)operand, addend);
167 }
168 }}}
169 #define __TBB_AtomicOR(P,V) tbb::internal::msvc_intrinsics::lock_or(P,V)
170 #define __TBB_AtomicAND(P,V) tbb::internal::msvc_intrinsics::lock_and(P,V)
171 #else
172 #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
173 #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
174 #endif
175 namespace tbb { namespace internal { namespace msvc_inline_asm {
lock_or(volatile void * operand,__int32 addend)176 static inline void lock_or( volatile void *operand, __int32 addend ) {
177 __asm
178 {
179 mov eax, addend
180 mov edx, [operand]
181 lock or [edx], eax
182 }
183 }
lock_and(volatile void * operand,__int32 addend)184 static inline void lock_and( volatile void *operand, __int32 addend ) {
185 __asm
186 {
187 mov eax, addend
188 mov edx, [operand]
189 lock and [edx], eax
190 }
191 }
192 }}}
193 #define __TBB_AtomicOR(P,V) tbb::internal::msvc_inline_asm::lock_or(P,V)
194 #define __TBB_AtomicAND(P,V) tbb::internal::msvc_inline_asm::lock_and(P,V)
195 #endif
196
197 #pragma intrinsic(__rdtsc)
198 namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
__TBB_machine_time_stamp()199 static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
200 return __rdtsc();
201 }
202 #define __TBB_time_stamp() __TBB_machine_time_stamp()
203
204 // API to retrieve/update FPU control setting
205 #define __TBB_CPU_CTL_ENV_PRESENT 1
206
207 namespace tbb { namespace internal { class cpu_ctl_env; } }
208 #if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
__TBB_get_cpu_ctl_env(tbb::internal::cpu_ctl_env * ctl)209 inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
210 __asm {
211 __asm mov __TBB_r(ax), ctl
212 __asm stmxcsr [__TBB_r(ax)]
213 __asm fstcw [__TBB_r(ax)+4]
214 }
215 }
__TBB_set_cpu_ctl_env(const tbb::internal::cpu_ctl_env * ctl)216 inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
217 __asm {
218 __asm mov __TBB_r(ax), ctl
219 __asm ldmxcsr [__TBB_r(ax)]
220 __asm fldcw [__TBB_r(ax)+4]
221 }
222 }
223 #else
224 extern "C" {
225 void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
226 void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
227 }
228 #endif
229
230 namespace tbb {
231 namespace internal {
232 class cpu_ctl_env {
233 private:
234 int mxcsr;
235 short x87cw;
236 static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
237 public:
238 bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
get_env()239 void get_env() {
240 __TBB_get_cpu_ctl_env( this );
241 mxcsr &= MXCSR_CONTROL_MASK;
242 }
set_env()243 void set_env() const { __TBB_set_cpu_ctl_env( this ); }
244 };
245 } // namespace internal
246 } // namespace tbb
247
248 #if !__TBB_WIN8UI_SUPPORT
249 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
250 #define __TBB_Yield() SwitchToThread()
251 #else
252 #include<thread>
253 #define __TBB_Yield() std::this_thread::yield()
254 #endif
255
256 #undef __TBB_r
257 #undef __TBB_W
258 #undef __TBB_word
259
260 extern "C" {
261 __int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
262 void __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
263
264 // 'pause' instruction aborts HLE/RTM transactions
__TBB_machine_try_lock_elided_cancel()265 inline static void __TBB_machine_try_lock_elided_cancel() { __TBB_SINGLE_PAUSE; }
266
267 #if __TBB_TSX_INTRINSICS_PRESENT
268 #define __TBB_machine_is_in_transaction _xtest
269 #define __TBB_machine_begin_transaction _xbegin
270 #define __TBB_machine_end_transaction _xend
271 // The value (0xFF) below comes from the
272 // Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
273 #define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
274 #else
275 __int8 __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
276 unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
277 void __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
278 void __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
279 #endif /* __TBB_TSX_INTRINSICS_PRESENT */
280 }
281