1 /*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #if !defined(__TBB_machine_H) || defined(__TBB_machine_msvc_ia32_common_H)
18 #error Do not #include this internal file directly; use public TBB headers instead.
19 #endif
20
21 #define __TBB_machine_msvc_ia32_common_H
22
23 #include <intrin.h>
24
25 //TODO: consider moving this macro to tbb_config.h and using where MSVC asm is used
26 #if !_M_X64 || __INTEL_COMPILER
27 #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
28 #else
29 //MSVC in x64 mode does not accept inline assembler
30 #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
31 #define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
32 #endif
33
34 #if _M_X64
35 #define __TBB_r(reg_name) r##reg_name
36 #define __TBB_W(name) name##64
37 namespace tbb { namespace internal { namespace msvc_intrinsics {
38 typedef __int64 word;
39 }}}
40 #else
41 #define __TBB_r(reg_name) e##reg_name
42 #define __TBB_W(name) name
43 namespace tbb { namespace internal { namespace msvc_intrinsics {
44 typedef long word;
45 }}}
46 #endif
47
48 #if __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT
49 // S is the operand size in bytes, B is the suffix for intrinsics for that size
50 #define __TBB_MACHINE_DEFINE_ATOMICS(S,B,T,U) \
51 __pragma(intrinsic( _InterlockedCompareExchange##B )) \
52 static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
53 return _InterlockedCompareExchange##B ( (T*)ptr, value, comparand ); \
54 } \
55 __pragma(intrinsic( _InterlockedExchangeAdd##B )) \
56 static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
57 return _InterlockedExchangeAdd##B ( (T*)ptr, addend ); \
58 } \
59 __pragma(intrinsic( _InterlockedExchange##B )) \
60 static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
61 return _InterlockedExchange##B ( (T*)ptr, value ); \
62 }
63
64 // Atomic intrinsics for 1, 2, and 4 bytes are available for x86 & x64
65 __TBB_MACHINE_DEFINE_ATOMICS(1,8,char,__int8)
66 __TBB_MACHINE_DEFINE_ATOMICS(2,16,short,__int16)
67 __TBB_MACHINE_DEFINE_ATOMICS(4,,long,__int32)
68
69 #if __TBB_WORDSIZE==8
70 __TBB_MACHINE_DEFINE_ATOMICS(8,64,__int64,__int64)
71 #endif
72
73 #undef __TBB_MACHINE_DEFINE_ATOMICS
74 #endif /* __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT */
75
76 #if _MSC_VER>=1300 || __INTEL_COMPILER>=1100
77 #pragma intrinsic(_ReadWriteBarrier)
78 #pragma intrinsic(_mm_mfence)
79 #define __TBB_compiler_fence() _ReadWriteBarrier()
80 #define __TBB_full_memory_fence() _mm_mfence()
81 #elif __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
82 #define __TBB_compiler_fence() __asm { __asm nop }
83 #define __TBB_full_memory_fence() __asm { __asm mfence }
84 #else
85 #error Unsupported compiler; define __TBB_{control,acquire,release}_consistency_helper to support it
86 #endif
87
88 #define __TBB_control_consistency_helper() __TBB_compiler_fence()
89 #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
90 #define __TBB_release_consistency_helper() __TBB_compiler_fence()
91
92 #if (_MSC_VER>=1300) || (__INTEL_COMPILER)
93 #pragma intrinsic(_mm_pause)
94 namespace tbb { namespace internal { namespace msvc_intrinsics {
pause(uintptr_t delay)95 static inline void pause (uintptr_t delay ) {
96 for (;delay>0; --delay )
97 _mm_pause();
98 }
99 }}}
100 #define __TBB_Pause(V) tbb::internal::msvc_intrinsics::pause(V)
101 #define __TBB_SINGLE_PAUSE _mm_pause()
102 #else
103 #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
104 #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
105 #endif
106 namespace tbb { namespace internal { namespace msvc_inline_asm
107 static inline void pause (uintptr_t delay ) {
108 _asm
109 {
110 mov __TBB_r(ax), delay
111 __TBB_L1:
112 pause
113 add __TBB_r(ax), -1
114 jne __TBB_L1
115 }
116 return;
117 }
118 }}}
119 #define __TBB_Pause(V) tbb::internal::msvc_inline_asm::pause(V)
120 #define __TBB_SINGLE_PAUSE __asm pause
121 #endif
122
123 #if (_MSC_VER>=1400 && !__INTEL_COMPILER) || (__INTEL_COMPILER>=1200)
124 // MSVC did not have this intrinsic prior to VC8.
125 // ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
126 #pragma intrinsic(__TBB_W(_BitScanReverse))
127 namespace tbb { namespace internal { namespace msvc_intrinsics {
lg_bsr(uintptr_t i)128 static inline uintptr_t lg_bsr( uintptr_t i ){
129 unsigned long j;
130 __TBB_W(_BitScanReverse)( &j, i );
131 return j;
132 }
133 }}}
134 #define __TBB_Log2(V) tbb::internal::msvc_intrinsics::lg_bsr(V)
135 #else
136 #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
137 #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
138 #endif
139 namespace tbb { namespace internal { namespace msvc_inline_asm {
lg_bsr(uintptr_t i)140 static inline uintptr_t lg_bsr( uintptr_t i ){
141 uintptr_t j;
142 __asm
143 {
144 bsr __TBB_r(ax), i
145 mov j, __TBB_r(ax)
146 }
147 return j;
148 }
149 }}}
150 #define __TBB_Log2(V) tbb::internal::msvc_inline_asm::lg_bsr(V)
151 #endif
152
153 #if _MSC_VER>=1400
154 #pragma intrinsic(__TBB_W(_InterlockedOr))
155 #pragma intrinsic(__TBB_W(_InterlockedAnd))
156 namespace tbb { namespace internal { namespace msvc_intrinsics {
lock_or(volatile void * operand,intptr_t addend)157 static inline void lock_or( volatile void *operand, intptr_t addend ){
158 __TBB_W(_InterlockedOr)((volatile word*)operand, addend);
159 }
lock_and(volatile void * operand,intptr_t addend)160 static inline void lock_and( volatile void *operand, intptr_t addend ){
161 __TBB_W(_InterlockedAnd)((volatile word*)operand, addend);
162 }
163 }}}
164 #define __TBB_AtomicOR(P,V) tbb::internal::msvc_intrinsics::lock_or(P,V)
165 #define __TBB_AtomicAND(P,V) tbb::internal::msvc_intrinsics::lock_and(P,V)
166 #else
167 #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
168 #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
169 #endif
170 namespace tbb { namespace internal { namespace msvc_inline_asm {
lock_or(volatile void * operand,__int32 addend)171 static inline void lock_or( volatile void *operand, __int32 addend ) {
172 __asm
173 {
174 mov eax, addend
175 mov edx, [operand]
176 lock or [edx], eax
177 }
178 }
lock_and(volatile void * operand,__int32 addend)179 static inline void lock_and( volatile void *operand, __int32 addend ) {
180 __asm
181 {
182 mov eax, addend
183 mov edx, [operand]
184 lock and [edx], eax
185 }
186 }
187 }}}
188 #define __TBB_AtomicOR(P,V) tbb::internal::msvc_inline_asm::lock_or(P,V)
189 #define __TBB_AtomicAND(P,V) tbb::internal::msvc_inline_asm::lock_and(P,V)
190 #endif
191
192 #pragma intrinsic(__rdtsc)
193 namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
__TBB_machine_time_stamp()194 static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
195 return __rdtsc();
196 }
197 #define __TBB_time_stamp() __TBB_machine_time_stamp()
198
199 // API to retrieve/update FPU control setting
200 #define __TBB_CPU_CTL_ENV_PRESENT 1
201
202 namespace tbb { namespace internal { class cpu_ctl_env; } }
203 #if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
__TBB_get_cpu_ctl_env(tbb::internal::cpu_ctl_env * ctl)204 inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
205 __asm {
206 __asm mov __TBB_r(ax), ctl
207 __asm stmxcsr [__TBB_r(ax)]
208 __asm fstcw [__TBB_r(ax)+4]
209 }
210 }
__TBB_set_cpu_ctl_env(const tbb::internal::cpu_ctl_env * ctl)211 inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
212 __asm {
213 __asm mov __TBB_r(ax), ctl
214 __asm ldmxcsr [__TBB_r(ax)]
215 __asm fldcw [__TBB_r(ax)+4]
216 }
217 }
218 #else
219 extern "C" {
220 void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
221 void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
222 }
223 #endif
224
225 namespace tbb {
226 namespace internal {
227 class cpu_ctl_env {
228 private:
229 int mxcsr;
230 short x87cw;
231 static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
232 public:
233 bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
get_env()234 void get_env() {
235 __TBB_get_cpu_ctl_env( this );
236 mxcsr &= MXCSR_CONTROL_MASK;
237 }
set_env()238 void set_env() const { __TBB_set_cpu_ctl_env( this ); }
239 };
240 } // namespace internal
241 } // namespace tbb
242
243 #if !__TBB_WIN8UI_SUPPORT
244 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
245 #define __TBB_Yield() SwitchToThread()
246 #else
247 #include<thread>
248 #define __TBB_Yield() std::this_thread::yield()
249 #endif
250
251 #undef __TBB_r
252 #undef __TBB_W
253 #undef __TBB_word
254
255 extern "C" {
256 __int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
257 void __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
258
259 // 'pause' instruction aborts HLE/RTM transactions
__TBB_machine_try_lock_elided_cancel()260 inline static void __TBB_machine_try_lock_elided_cancel() { __TBB_SINGLE_PAUSE; }
261
262 #if __TBB_TSX_INTRINSICS_PRESENT
263 #define __TBB_machine_is_in_transaction _xtest
264 #define __TBB_machine_begin_transaction _xbegin
265 #define __TBB_machine_end_transaction _xend
266 // The value (0xFF) below comes from the
267 // Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
268 #define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
269 #else
270 __int8 __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
271 unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
272 void __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
273 void __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
274 #endif /* __TBB_TSX_INTRINSICS_PRESENT */
275 }
276